xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 89b15863)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include "mlx5_vdpa.h"
19 
20 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
21 MODULE_DESCRIPTION("Mellanox VDPA driver");
22 MODULE_LICENSE("Dual BSD/GPL");
23 
24 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
25 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
26 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 struct mlx5_vdpa_net_resources {
48 	u32 tisn;
49 	u32 tdn;
50 	u32 tirn;
51 	u32 rqtn;
52 	bool valid;
53 };
54 
55 struct mlx5_vdpa_cq_buf {
56 	struct mlx5_frag_buf_ctrl fbc;
57 	struct mlx5_frag_buf frag_buf;
58 	int cqe_size;
59 	int nent;
60 };
61 
62 struct mlx5_vdpa_cq {
63 	struct mlx5_core_cq mcq;
64 	struct mlx5_vdpa_cq_buf buf;
65 	struct mlx5_db db;
66 	int cqe;
67 };
68 
69 struct mlx5_vdpa_umem {
70 	struct mlx5_frag_buf_ctrl fbc;
71 	struct mlx5_frag_buf frag_buf;
72 	int size;
73 	u32 id;
74 };
75 
76 struct mlx5_vdpa_qp {
77 	struct mlx5_core_qp mqp;
78 	struct mlx5_frag_buf frag_buf;
79 	struct mlx5_db db;
80 	u16 head;
81 	bool fw;
82 };
83 
84 struct mlx5_vq_restore_info {
85 	u32 num_ent;
86 	u64 desc_addr;
87 	u64 device_addr;
88 	u64 driver_addr;
89 	u16 avail_index;
90 	bool ready;
91 	struct vdpa_callback cb;
92 	bool restore;
93 };
94 
95 struct mlx5_vdpa_virtqueue {
96 	bool ready;
97 	u64 desc_addr;
98 	u64 device_addr;
99 	u64 driver_addr;
100 	u32 num_ent;
101 	struct vdpa_callback event_cb;
102 
103 	/* Resources for implementing the notification channel from the device
104 	 * to the driver. fwqp is the firmware end of an RC connection; the
105 	 * other end is vqqp used by the driver. cq is is where completions are
106 	 * reported.
107 	 */
108 	struct mlx5_vdpa_cq cq;
109 	struct mlx5_vdpa_qp fwqp;
110 	struct mlx5_vdpa_qp vqqp;
111 
112 	/* umem resources are required for the virtqueue operation. They're use
113 	 * is internal and they must be provided by the driver.
114 	 */
115 	struct mlx5_vdpa_umem umem1;
116 	struct mlx5_vdpa_umem umem2;
117 	struct mlx5_vdpa_umem umem3;
118 
119 	bool initialized;
120 	int index;
121 	u32 virtq_id;
122 	struct mlx5_vdpa_net *ndev;
123 	u16 avail_idx;
124 	int fw_state;
125 
126 	/* keep last in the struct */
127 	struct mlx5_vq_restore_info ri;
128 };
129 
130 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
131  * provides for driver space allocation
132  */
133 #define MLX5_MAX_SUPPORTED_VQS 16
134 
135 struct mlx5_vdpa_net {
136 	struct mlx5_vdpa_dev mvdev;
137 	struct mlx5_vdpa_net_resources res;
138 	struct virtio_net_config config;
139 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
140 
141 	/* Serialize vq resources creation and destruction. This is required
142 	 * since memory map might change and we need to destroy and create
143 	 * resources while driver in operational.
144 	 */
145 	struct mutex reslock;
146 	struct mlx5_flow_table *rxft;
147 	struct mlx5_fc *rx_counter;
148 	struct mlx5_flow_handle *rx_rule;
149 	bool setup;
150 	u16 mtu;
151 };
152 
153 static void free_resources(struct mlx5_vdpa_net *ndev);
154 static void init_mvqs(struct mlx5_vdpa_net *ndev);
155 static int setup_driver(struct mlx5_vdpa_net *ndev);
156 static void teardown_driver(struct mlx5_vdpa_net *ndev);
157 
158 static bool mlx5_vdpa_debug;
159 
160 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
161 	do {                                                                                       \
162 		if (features & BIT_ULL(_feature))                                                  \
163 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
164 	} while (0)
165 
166 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
167 	do {                                                                                       \
168 		if (status & (_status))                                                            \
169 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
170 	} while (0)
171 
172 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
173 {
174 	return max_vqs / 2;
175 }
176 
177 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
178 {
179 	if (status & ~VALID_STATUS_MASK)
180 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
181 			       status & ~VALID_STATUS_MASK);
182 
183 	if (!mlx5_vdpa_debug)
184 		return;
185 
186 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
187 	if (set && !status) {
188 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
189 		return;
190 	}
191 
192 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
193 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
194 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
195 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
196 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
197 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
198 }
199 
200 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
201 {
202 	if (features & ~VALID_FEATURES_MASK)
203 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
204 			       features & ~VALID_FEATURES_MASK);
205 
206 	if (!mlx5_vdpa_debug)
207 		return;
208 
209 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
210 	if (!features)
211 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
212 
213 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
214 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
215 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
216 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
217 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
218 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
219 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
220 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
221 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
222 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
223 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
224 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
225 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
247 }
248 
249 static int create_tis(struct mlx5_vdpa_net *ndev)
250 {
251 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
252 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
253 	void *tisc;
254 	int err;
255 
256 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
257 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
258 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
259 	if (err)
260 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
261 
262 	return err;
263 }
264 
265 static void destroy_tis(struct mlx5_vdpa_net *ndev)
266 {
267 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
268 }
269 
270 #define MLX5_VDPA_CQE_SIZE 64
271 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
272 
273 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
274 {
275 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
276 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
277 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
278 	int err;
279 
280 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
281 				       ndev->mvdev.mdev->priv.numa_node);
282 	if (err)
283 		return err;
284 
285 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
286 
287 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
288 	buf->nent = nent;
289 
290 	return 0;
291 }
292 
293 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
294 {
295 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
296 
297 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
298 					ndev->mvdev.mdev->priv.numa_node);
299 }
300 
301 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
302 {
303 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
304 }
305 
306 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
307 {
308 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
309 }
310 
311 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
312 {
313 	struct mlx5_cqe64 *cqe64;
314 	void *cqe;
315 	int i;
316 
317 	for (i = 0; i < buf->nent; i++) {
318 		cqe = get_cqe(vcq, i);
319 		cqe64 = cqe;
320 		cqe64->op_own = MLX5_CQE_INVALID << 4;
321 	}
322 }
323 
324 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
325 {
326 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
327 
328 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
329 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
330 		return cqe64;
331 
332 	return NULL;
333 }
334 
335 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
336 {
337 	vqp->head += n;
338 	vqp->db.db[0] = cpu_to_be32(vqp->head);
339 }
340 
341 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
342 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
343 {
344 	struct mlx5_vdpa_qp *vqp;
345 	__be64 *pas;
346 	void *qpc;
347 
348 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
349 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
350 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
351 	if (vqp->fw) {
352 		/* Firmware QP is allocated by the driver for the firmware's
353 		 * use so we can skip part of the params as they will be chosen by firmware
354 		 */
355 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
356 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
357 		MLX5_SET(qpc, qpc, no_sq, 1);
358 		return;
359 	}
360 
361 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
362 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
363 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
364 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
365 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
366 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
367 	MLX5_SET(qpc, qpc, no_sq, 1);
368 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
369 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
370 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
371 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
372 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
373 }
374 
375 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
376 {
377 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
378 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
379 					ndev->mvdev.mdev->priv.numa_node);
380 }
381 
382 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
383 {
384 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
385 }
386 
387 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
388 		     struct mlx5_vdpa_qp *vqp)
389 {
390 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
391 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
392 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
393 	void *qpc;
394 	void *in;
395 	int err;
396 
397 	if (!vqp->fw) {
398 		vqp = &mvq->vqqp;
399 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
400 		if (err)
401 			return err;
402 
403 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
404 		if (err)
405 			goto err_db;
406 		inlen += vqp->frag_buf.npages * sizeof(__be64);
407 	}
408 
409 	in = kzalloc(inlen, GFP_KERNEL);
410 	if (!in) {
411 		err = -ENOMEM;
412 		goto err_kzalloc;
413 	}
414 
415 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
416 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
417 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
418 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
419 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
420 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
421 	if (!vqp->fw)
422 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
423 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
424 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
425 	kfree(in);
426 	if (err)
427 		goto err_kzalloc;
428 
429 	vqp->mqp.uid = ndev->mvdev.res.uid;
430 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
431 
432 	if (!vqp->fw)
433 		rx_post(vqp, mvq->num_ent);
434 
435 	return 0;
436 
437 err_kzalloc:
438 	if (!vqp->fw)
439 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
440 err_db:
441 	if (!vqp->fw)
442 		rq_buf_free(ndev, vqp);
443 
444 	return err;
445 }
446 
447 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
448 {
449 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
450 
451 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
452 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
453 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
454 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
455 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
456 	if (!vqp->fw) {
457 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
458 		rq_buf_free(ndev, vqp);
459 	}
460 }
461 
462 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
463 {
464 	return get_sw_cqe(cq, cq->mcq.cons_index);
465 }
466 
467 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
468 {
469 	struct mlx5_cqe64 *cqe64;
470 
471 	cqe64 = next_cqe_sw(vcq);
472 	if (!cqe64)
473 		return -EAGAIN;
474 
475 	vcq->mcq.cons_index++;
476 	return 0;
477 }
478 
479 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
480 {
481 	mlx5_cq_set_ci(&mvq->cq.mcq);
482 	rx_post(&mvq->vqqp, num);
483 	if (mvq->event_cb.callback)
484 		mvq->event_cb.callback(mvq->event_cb.private);
485 }
486 
487 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
488 {
489 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
490 	struct mlx5_vdpa_net *ndev = mvq->ndev;
491 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
492 	int num = 0;
493 
494 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
495 		num++;
496 		if (num > mvq->num_ent / 2) {
497 			/* If completions keep coming while we poll, we want to
498 			 * let the hardware know that we consumed them by
499 			 * updating the doorbell record.  We also let vdpa core
500 			 * know about this so it passes it on the virtio driver
501 			 * on the guest.
502 			 */
503 			mlx5_vdpa_handle_completions(mvq, num);
504 			num = 0;
505 		}
506 	}
507 
508 	if (num)
509 		mlx5_vdpa_handle_completions(mvq, num);
510 
511 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
512 }
513 
514 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
515 {
516 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
517 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
518 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
519 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
520 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
521 	unsigned int irqn;
522 	__be64 *pas;
523 	int inlen;
524 	void *cqc;
525 	void *in;
526 	int err;
527 	int eqn;
528 
529 	err = mlx5_db_alloc(mdev, &vcq->db);
530 	if (err)
531 		return err;
532 
533 	vcq->mcq.set_ci_db = vcq->db.db;
534 	vcq->mcq.arm_db = vcq->db.db + 1;
535 	vcq->mcq.cqe_sz = 64;
536 
537 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
538 	if (err)
539 		goto err_db;
540 
541 	cq_frag_buf_init(vcq, &vcq->buf);
542 
543 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
544 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
545 	in = kzalloc(inlen, GFP_KERNEL);
546 	if (!in) {
547 		err = -ENOMEM;
548 		goto err_vzalloc;
549 	}
550 
551 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
552 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
553 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
554 
555 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
556 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
557 
558 	/* Use vector 0 by default. Consider adding code to choose least used
559 	 * vector.
560 	 */
561 	err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
562 	if (err)
563 		goto err_vec;
564 
565 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
566 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
567 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
568 	MLX5_SET(cqc, cqc, c_eqn, eqn);
569 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
570 
571 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
572 	if (err)
573 		goto err_vec;
574 
575 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
576 	vcq->cqe = num_ent;
577 	vcq->mcq.set_ci_db = vcq->db.db;
578 	vcq->mcq.arm_db = vcq->db.db + 1;
579 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
580 	kfree(in);
581 	return 0;
582 
583 err_vec:
584 	kfree(in);
585 err_vzalloc:
586 	cq_frag_buf_free(ndev, &vcq->buf);
587 err_db:
588 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
589 	return err;
590 }
591 
592 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
593 {
594 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
595 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
596 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
597 
598 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
599 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
600 		return;
601 	}
602 	cq_frag_buf_free(ndev, &vcq->buf);
603 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
604 }
605 
606 static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
607 		     struct mlx5_vdpa_umem **umemp)
608 {
609 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
610 	int p_a;
611 	int p_b;
612 
613 	switch (num) {
614 	case 1:
615 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
616 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
617 		*umemp = &mvq->umem1;
618 		break;
619 	case 2:
620 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
621 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
622 		*umemp = &mvq->umem2;
623 		break;
624 	case 3:
625 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
626 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
627 		*umemp = &mvq->umem3;
628 		break;
629 	}
630 	return p_a * mvq->num_ent + p_b;
631 }
632 
633 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
634 {
635 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
636 }
637 
638 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
639 {
640 	int inlen;
641 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
642 	void *um;
643 	void *in;
644 	int err;
645 	__be64 *pas;
646 	int size;
647 	struct mlx5_vdpa_umem *umem;
648 
649 	size = umem_size(ndev, mvq, num, &umem);
650 	if (size < 0)
651 		return size;
652 
653 	umem->size = size;
654 	err = umem_frag_buf_alloc(ndev, umem, size);
655 	if (err)
656 		return err;
657 
658 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
659 
660 	in = kzalloc(inlen, GFP_KERNEL);
661 	if (!in) {
662 		err = -ENOMEM;
663 		goto err_in;
664 	}
665 
666 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
667 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
668 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
669 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
670 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
671 
672 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
673 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
674 
675 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
676 	if (err) {
677 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
678 		goto err_cmd;
679 	}
680 
681 	kfree(in);
682 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
683 
684 	return 0;
685 
686 err_cmd:
687 	kfree(in);
688 err_in:
689 	umem_frag_buf_free(ndev, umem);
690 	return err;
691 }
692 
693 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
694 {
695 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
696 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
697 	struct mlx5_vdpa_umem *umem;
698 
699 	switch (num) {
700 	case 1:
701 		umem = &mvq->umem1;
702 		break;
703 	case 2:
704 		umem = &mvq->umem2;
705 		break;
706 	case 3:
707 		umem = &mvq->umem3;
708 		break;
709 	}
710 
711 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
712 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
713 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
714 		return;
715 
716 	umem_frag_buf_free(ndev, umem);
717 }
718 
719 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
720 {
721 	int num;
722 	int err;
723 
724 	for (num = 1; num <= 3; num++) {
725 		err = create_umem(ndev, mvq, num);
726 		if (err)
727 			goto err_umem;
728 	}
729 	return 0;
730 
731 err_umem:
732 	for (num--; num > 0; num--)
733 		umem_destroy(ndev, mvq, num);
734 
735 	return err;
736 }
737 
738 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
739 {
740 	int num;
741 
742 	for (num = 3; num > 0; num--)
743 		umem_destroy(ndev, mvq, num);
744 }
745 
746 static int get_queue_type(struct mlx5_vdpa_net *ndev)
747 {
748 	u32 type_mask;
749 
750 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
751 
752 	/* prefer split queue */
753 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
754 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
755 
756 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
757 
758 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
759 }
760 
761 static bool vq_is_tx(u16 idx)
762 {
763 	return idx % 2;
764 }
765 
766 static u16 get_features_12_3(u64 features)
767 {
768 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
769 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
770 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
771 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
772 }
773 
774 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
775 {
776 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
777 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
778 	void *obj_context;
779 	void *cmd_hdr;
780 	void *vq_ctx;
781 	void *in;
782 	int err;
783 
784 	err = umems_create(ndev, mvq);
785 	if (err)
786 		return err;
787 
788 	in = kzalloc(inlen, GFP_KERNEL);
789 	if (!in) {
790 		err = -ENOMEM;
791 		goto err_alloc;
792 	}
793 
794 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
795 
796 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
797 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
798 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
799 
800 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
801 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
802 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
803 		 get_features_12_3(ndev->mvdev.actual_features));
804 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
805 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
806 
807 	if (vq_is_tx(mvq->index))
808 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
809 
810 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
811 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
812 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
813 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
814 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
815 		 !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1));
816 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
817 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
818 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
819 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
820 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
821 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
822 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
823 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
824 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
825 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
826 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
827 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
828 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
829 
830 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
831 	if (err)
832 		goto err_cmd;
833 
834 	kfree(in);
835 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
836 
837 	return 0;
838 
839 err_cmd:
840 	kfree(in);
841 err_alloc:
842 	umems_destroy(ndev, mvq);
843 	return err;
844 }
845 
846 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
847 {
848 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
849 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
850 
851 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
852 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
853 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
854 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
855 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
856 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
857 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
858 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
859 		return;
860 	}
861 	umems_destroy(ndev, mvq);
862 }
863 
864 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
865 {
866 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
867 }
868 
869 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
870 {
871 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
872 }
873 
874 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
875 			int *outlen, u32 qpn, u32 rqpn)
876 {
877 	void *qpc;
878 	void *pp;
879 
880 	switch (cmd) {
881 	case MLX5_CMD_OP_2RST_QP:
882 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
883 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
884 		*in = kzalloc(*inlen, GFP_KERNEL);
885 		*out = kzalloc(*outlen, GFP_KERNEL);
886 		if (!*in || !*out)
887 			goto outerr;
888 
889 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
890 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
891 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
892 		break;
893 	case MLX5_CMD_OP_RST2INIT_QP:
894 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
895 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
896 		*in = kzalloc(*inlen, GFP_KERNEL);
897 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
898 		if (!*in || !*out)
899 			goto outerr;
900 
901 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
902 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
903 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
904 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
905 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
906 		MLX5_SET(qpc, qpc, rwe, 1);
907 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
908 		MLX5_SET(ads, pp, vhca_port_num, 1);
909 		break;
910 	case MLX5_CMD_OP_INIT2RTR_QP:
911 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
912 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
913 		*in = kzalloc(*inlen, GFP_KERNEL);
914 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
915 		if (!*in || !*out)
916 			goto outerr;
917 
918 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
919 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
920 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
921 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
922 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
923 		MLX5_SET(qpc, qpc, log_msg_max, 30);
924 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
925 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
926 		MLX5_SET(ads, pp, fl, 1);
927 		break;
928 	case MLX5_CMD_OP_RTR2RTS_QP:
929 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
930 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
931 		*in = kzalloc(*inlen, GFP_KERNEL);
932 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
933 		if (!*in || !*out)
934 			goto outerr;
935 
936 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
937 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
938 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
939 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
940 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
941 		MLX5_SET(ads, pp, ack_timeout, 14);
942 		MLX5_SET(qpc, qpc, retry_count, 7);
943 		MLX5_SET(qpc, qpc, rnr_retry, 7);
944 		break;
945 	default:
946 		goto outerr_nullify;
947 	}
948 
949 	return;
950 
951 outerr:
952 	kfree(*in);
953 	kfree(*out);
954 outerr_nullify:
955 	*in = NULL;
956 	*out = NULL;
957 }
958 
959 static void free_inout(void *in, void *out)
960 {
961 	kfree(in);
962 	kfree(out);
963 }
964 
965 /* Two QPs are used by each virtqueue. One is used by the driver and one by
966  * firmware. The fw argument indicates whether the subjected QP is the one used
967  * by firmware.
968  */
969 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
970 {
971 	int outlen;
972 	int inlen;
973 	void *out;
974 	void *in;
975 	int err;
976 
977 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
978 	if (!in || !out)
979 		return -ENOMEM;
980 
981 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
982 	free_inout(in, out);
983 	return err;
984 }
985 
986 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
987 {
988 	int err;
989 
990 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
991 	if (err)
992 		return err;
993 
994 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
995 	if (err)
996 		return err;
997 
998 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
999 	if (err)
1000 		return err;
1001 
1002 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1003 	if (err)
1004 		return err;
1005 
1006 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1007 	if (err)
1008 		return err;
1009 
1010 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1011 	if (err)
1012 		return err;
1013 
1014 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1015 }
1016 
1017 struct mlx5_virtq_attr {
1018 	u8 state;
1019 	u16 available_index;
1020 };
1021 
1022 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1023 			   struct mlx5_virtq_attr *attr)
1024 {
1025 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1026 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1027 	void *out;
1028 	void *obj_context;
1029 	void *cmd_hdr;
1030 	int err;
1031 
1032 	out = kzalloc(outlen, GFP_KERNEL);
1033 	if (!out)
1034 		return -ENOMEM;
1035 
1036 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1037 
1038 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1039 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1040 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1041 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1042 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1043 	if (err)
1044 		goto err_cmd;
1045 
1046 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1047 	memset(attr, 0, sizeof(*attr));
1048 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1049 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1050 	kfree(out);
1051 	return 0;
1052 
1053 err_cmd:
1054 	kfree(out);
1055 	return err;
1056 }
1057 
1058 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1059 {
1060 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1061 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1062 	void *obj_context;
1063 	void *cmd_hdr;
1064 	void *in;
1065 	int err;
1066 
1067 	in = kzalloc(inlen, GFP_KERNEL);
1068 	if (!in)
1069 		return -ENOMEM;
1070 
1071 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1072 
1073 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1074 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1075 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1076 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1077 
1078 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1079 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1080 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1081 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1082 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1083 	kfree(in);
1084 	if (!err)
1085 		mvq->fw_state = state;
1086 
1087 	return err;
1088 }
1089 
1090 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1091 {
1092 	u16 idx = mvq->index;
1093 	int err;
1094 
1095 	if (!mvq->num_ent)
1096 		return 0;
1097 
1098 	if (mvq->initialized) {
1099 		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1100 		return -EINVAL;
1101 	}
1102 
1103 	err = cq_create(ndev, idx, mvq->num_ent);
1104 	if (err)
1105 		return err;
1106 
1107 	err = qp_create(ndev, mvq, &mvq->fwqp);
1108 	if (err)
1109 		goto err_fwqp;
1110 
1111 	err = qp_create(ndev, mvq, &mvq->vqqp);
1112 	if (err)
1113 		goto err_vqqp;
1114 
1115 	err = connect_qps(ndev, mvq);
1116 	if (err)
1117 		goto err_connect;
1118 
1119 	err = create_virtqueue(ndev, mvq);
1120 	if (err)
1121 		goto err_connect;
1122 
1123 	if (mvq->ready) {
1124 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1125 		if (err) {
1126 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1127 				       idx, err);
1128 			goto err_connect;
1129 		}
1130 	}
1131 
1132 	mvq->initialized = true;
1133 	return 0;
1134 
1135 err_connect:
1136 	qp_destroy(ndev, &mvq->vqqp);
1137 err_vqqp:
1138 	qp_destroy(ndev, &mvq->fwqp);
1139 err_fwqp:
1140 	cq_destroy(ndev, idx);
1141 	return err;
1142 }
1143 
1144 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1145 {
1146 	struct mlx5_virtq_attr attr;
1147 
1148 	if (!mvq->initialized)
1149 		return;
1150 
1151 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1152 		return;
1153 
1154 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1155 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1156 
1157 	if (query_virtqueue(ndev, mvq, &attr)) {
1158 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1159 		return;
1160 	}
1161 	mvq->avail_idx = attr.available_index;
1162 }
1163 
1164 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1165 {
1166 	int i;
1167 
1168 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1169 		suspend_vq(ndev, &ndev->vqs[i]);
1170 }
1171 
1172 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1173 {
1174 	if (!mvq->initialized)
1175 		return;
1176 
1177 	suspend_vq(ndev, mvq);
1178 	destroy_virtqueue(ndev, mvq);
1179 	qp_destroy(ndev, &mvq->vqqp);
1180 	qp_destroy(ndev, &mvq->fwqp);
1181 	cq_destroy(ndev, mvq->index);
1182 	mvq->initialized = false;
1183 }
1184 
1185 static int create_rqt(struct mlx5_vdpa_net *ndev)
1186 {
1187 	int log_max_rqt;
1188 	__be32 *list;
1189 	void *rqtc;
1190 	int inlen;
1191 	void *in;
1192 	int i, j;
1193 	int err;
1194 
1195 	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1196 	if (log_max_rqt < 1)
1197 		return -EOPNOTSUPP;
1198 
1199 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1200 	in = kzalloc(inlen, GFP_KERNEL);
1201 	if (!in)
1202 		return -ENOMEM;
1203 
1204 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1205 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1206 
1207 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1208 	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1209 	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1210 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1211 	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1212 		if (!ndev->vqs[j].initialized)
1213 			continue;
1214 
1215 		if (!vq_is_tx(ndev->vqs[j].index)) {
1216 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1217 			i++;
1218 		}
1219 	}
1220 
1221 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1222 	kfree(in);
1223 	if (err)
1224 		return err;
1225 
1226 	return 0;
1227 }
1228 
1229 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1230 {
1231 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1232 }
1233 
1234 static int create_tir(struct mlx5_vdpa_net *ndev)
1235 {
1236 #define HASH_IP_L4PORTS                                                                            \
1237 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1238 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1239 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1240 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1241 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1242 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1243 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1244 	void *rss_key;
1245 	void *outer;
1246 	void *tirc;
1247 	void *in;
1248 	int err;
1249 
1250 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1251 	if (!in)
1252 		return -ENOMEM;
1253 
1254 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1255 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1256 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1257 
1258 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1259 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1260 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1261 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1262 
1263 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1264 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1265 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1266 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1267 
1268 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1269 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1270 
1271 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1272 	kfree(in);
1273 	return err;
1274 }
1275 
1276 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1277 {
1278 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1279 }
1280 
1281 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1282 {
1283 	struct mlx5_flow_destination dest[2] = {};
1284 	struct mlx5_flow_table_attr ft_attr = {};
1285 	struct mlx5_flow_act flow_act = {};
1286 	struct mlx5_flow_namespace *ns;
1287 	int err;
1288 
1289 	/* for now, one entry, match all, forward to tir */
1290 	ft_attr.max_fte = 1;
1291 	ft_attr.autogroup.max_num_groups = 1;
1292 
1293 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1294 	if (!ns) {
1295 		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1296 		return -EOPNOTSUPP;
1297 	}
1298 
1299 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1300 	if (IS_ERR(ndev->rxft))
1301 		return PTR_ERR(ndev->rxft);
1302 
1303 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1304 	if (IS_ERR(ndev->rx_counter)) {
1305 		err = PTR_ERR(ndev->rx_counter);
1306 		goto err_fc;
1307 	}
1308 
1309 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1310 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1311 	dest[0].tir_num = ndev->res.tirn;
1312 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1313 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1314 	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1315 	if (IS_ERR(ndev->rx_rule)) {
1316 		err = PTR_ERR(ndev->rx_rule);
1317 		ndev->rx_rule = NULL;
1318 		goto err_rule;
1319 	}
1320 
1321 	return 0;
1322 
1323 err_rule:
1324 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1325 err_fc:
1326 	mlx5_destroy_flow_table(ndev->rxft);
1327 	return err;
1328 }
1329 
1330 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1331 {
1332 	if (!ndev->rx_rule)
1333 		return;
1334 
1335 	mlx5_del_flow_rules(ndev->rx_rule);
1336 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1337 	mlx5_destroy_flow_table(ndev->rxft);
1338 
1339 	ndev->rx_rule = NULL;
1340 }
1341 
1342 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1343 {
1344 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1345 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1346 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1347 
1348 	if (unlikely(!mvq->ready))
1349 		return;
1350 
1351 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1352 }
1353 
1354 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1355 				    u64 driver_area, u64 device_area)
1356 {
1357 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1358 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1359 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1360 
1361 	mvq->desc_addr = desc_area;
1362 	mvq->device_addr = device_area;
1363 	mvq->driver_addr = driver_area;
1364 	return 0;
1365 }
1366 
1367 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1368 {
1369 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1370 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1371 	struct mlx5_vdpa_virtqueue *mvq;
1372 
1373 	mvq = &ndev->vqs[idx];
1374 	mvq->num_ent = num;
1375 }
1376 
1377 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1378 {
1379 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1380 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1381 	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1382 
1383 	vq->event_cb = *cb;
1384 }
1385 
1386 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1387 {
1388 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1389 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1390 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1391 
1392 	if (!ready)
1393 		suspend_vq(ndev, mvq);
1394 
1395 	mvq->ready = ready;
1396 }
1397 
1398 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1399 {
1400 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1401 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1402 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1403 
1404 	return mvq->ready;
1405 }
1406 
1407 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1408 				  const struct vdpa_vq_state *state)
1409 {
1410 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1411 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1412 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1413 
1414 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1415 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1416 		return -EINVAL;
1417 	}
1418 
1419 	mvq->avail_idx = state->avail_index;
1420 	return 0;
1421 }
1422 
1423 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1424 {
1425 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1426 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1427 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1428 	struct mlx5_virtq_attr attr;
1429 	int err;
1430 
1431 	/* If the virtq object was destroyed, use the value saved at
1432 	 * the last minute of suspend_vq. This caters for userspace
1433 	 * that cares about emulating the index after vq is stopped.
1434 	 */
1435 	if (!mvq->initialized) {
1436 		state->avail_index = mvq->avail_idx;
1437 		return 0;
1438 	}
1439 
1440 	err = query_virtqueue(ndev, mvq, &attr);
1441 	if (err) {
1442 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1443 		return err;
1444 	}
1445 	state->avail_index = attr.available_index;
1446 	return 0;
1447 }
1448 
1449 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1450 {
1451 	return PAGE_SIZE;
1452 }
1453 
1454 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1455 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1456 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1457 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1458 };
1459 
1460 static u64 mlx_to_vritio_features(u16 dev_features)
1461 {
1462 	u64 result = 0;
1463 
1464 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1465 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1466 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1467 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1468 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1469 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1470 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1471 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1472 
1473 	return result;
1474 }
1475 
1476 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1477 {
1478 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1479 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1480 	u16 dev_features;
1481 
1482 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1483 	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1484 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1485 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1486 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1487 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1488 	return ndev->mvdev.mlx_features;
1489 }
1490 
1491 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1492 {
1493 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1494 		return -EOPNOTSUPP;
1495 
1496 	return 0;
1497 }
1498 
1499 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1500 {
1501 	int err;
1502 	int i;
1503 
1504 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1505 		err = setup_vq(ndev, &ndev->vqs[i]);
1506 		if (err)
1507 			goto err_vq;
1508 	}
1509 
1510 	return 0;
1511 
1512 err_vq:
1513 	for (--i; i >= 0; i--)
1514 		teardown_vq(ndev, &ndev->vqs[i]);
1515 
1516 	return err;
1517 }
1518 
1519 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1520 {
1521 	struct mlx5_vdpa_virtqueue *mvq;
1522 	int i;
1523 
1524 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1525 		mvq = &ndev->vqs[i];
1526 		if (!mvq->initialized)
1527 			continue;
1528 
1529 		teardown_vq(ndev, mvq);
1530 	}
1531 }
1532 
1533 /* TODO: cross-endian support */
1534 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1535 {
1536 	return virtio_legacy_is_little_endian() ||
1537 		(mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1));
1538 }
1539 
1540 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1541 {
1542 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1543 }
1544 
1545 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1546 {
1547 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1548 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1549 	int err;
1550 
1551 	print_features(mvdev, features, true);
1552 
1553 	err = verify_min_features(mvdev, features);
1554 	if (err)
1555 		return err;
1556 
1557 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1558 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1559 	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1560 	return err;
1561 }
1562 
1563 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1564 {
1565 	/* not implemented */
1566 	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1567 }
1568 
1569 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1570 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1571 {
1572 	return MLX5_VDPA_MAX_VQ_ENTRIES;
1573 }
1574 
1575 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1576 {
1577 	return VIRTIO_ID_NET;
1578 }
1579 
1580 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1581 {
1582 	return PCI_VENDOR_ID_MELLANOX;
1583 }
1584 
1585 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1586 {
1587 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1588 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1589 
1590 	print_status(mvdev, ndev->mvdev.status, false);
1591 	return ndev->mvdev.status;
1592 }
1593 
1594 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1595 {
1596 	struct mlx5_vq_restore_info *ri = &mvq->ri;
1597 	struct mlx5_virtq_attr attr;
1598 	int err;
1599 
1600 	if (!mvq->initialized)
1601 		return 0;
1602 
1603 	err = query_virtqueue(ndev, mvq, &attr);
1604 	if (err)
1605 		return err;
1606 
1607 	ri->avail_index = attr.available_index;
1608 	ri->ready = mvq->ready;
1609 	ri->num_ent = mvq->num_ent;
1610 	ri->desc_addr = mvq->desc_addr;
1611 	ri->device_addr = mvq->device_addr;
1612 	ri->driver_addr = mvq->driver_addr;
1613 	ri->cb = mvq->event_cb;
1614 	ri->restore = true;
1615 	return 0;
1616 }
1617 
1618 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1619 {
1620 	int i;
1621 
1622 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1623 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1624 		save_channel_info(ndev, &ndev->vqs[i]);
1625 	}
1626 	return 0;
1627 }
1628 
1629 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1630 {
1631 	int i;
1632 
1633 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1634 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1635 }
1636 
1637 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1638 {
1639 	struct mlx5_vdpa_virtqueue *mvq;
1640 	struct mlx5_vq_restore_info *ri;
1641 	int i;
1642 
1643 	mlx5_clear_vqs(ndev);
1644 	init_mvqs(ndev);
1645 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1646 		mvq = &ndev->vqs[i];
1647 		ri = &mvq->ri;
1648 		if (!ri->restore)
1649 			continue;
1650 
1651 		mvq->avail_idx = ri->avail_index;
1652 		mvq->ready = ri->ready;
1653 		mvq->num_ent = ri->num_ent;
1654 		mvq->desc_addr = ri->desc_addr;
1655 		mvq->device_addr = ri->device_addr;
1656 		mvq->driver_addr = ri->driver_addr;
1657 		mvq->event_cb = ri->cb;
1658 	}
1659 }
1660 
1661 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1662 {
1663 	int err;
1664 
1665 	suspend_vqs(ndev);
1666 	err = save_channels_info(ndev);
1667 	if (err)
1668 		goto err_mr;
1669 
1670 	teardown_driver(ndev);
1671 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1672 	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1673 	if (err)
1674 		goto err_mr;
1675 
1676 	if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1677 		return 0;
1678 
1679 	restore_channels_info(ndev);
1680 	err = setup_driver(ndev);
1681 	if (err)
1682 		goto err_setup;
1683 
1684 	return 0;
1685 
1686 err_setup:
1687 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1688 err_mr:
1689 	return err;
1690 }
1691 
1692 static int setup_driver(struct mlx5_vdpa_net *ndev)
1693 {
1694 	int err;
1695 
1696 	mutex_lock(&ndev->reslock);
1697 	if (ndev->setup) {
1698 		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1699 		err = 0;
1700 		goto out;
1701 	}
1702 	err = setup_virtqueues(ndev);
1703 	if (err) {
1704 		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1705 		goto out;
1706 	}
1707 
1708 	err = create_rqt(ndev);
1709 	if (err) {
1710 		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1711 		goto err_rqt;
1712 	}
1713 
1714 	err = create_tir(ndev);
1715 	if (err) {
1716 		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1717 		goto err_tir;
1718 	}
1719 
1720 	err = add_fwd_to_tir(ndev);
1721 	if (err) {
1722 		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1723 		goto err_fwd;
1724 	}
1725 	ndev->setup = true;
1726 	mutex_unlock(&ndev->reslock);
1727 
1728 	return 0;
1729 
1730 err_fwd:
1731 	destroy_tir(ndev);
1732 err_tir:
1733 	destroy_rqt(ndev);
1734 err_rqt:
1735 	teardown_virtqueues(ndev);
1736 out:
1737 	mutex_unlock(&ndev->reslock);
1738 	return err;
1739 }
1740 
1741 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1742 {
1743 	mutex_lock(&ndev->reslock);
1744 	if (!ndev->setup)
1745 		goto out;
1746 
1747 	remove_fwd_to_tir(ndev);
1748 	destroy_tir(ndev);
1749 	destroy_rqt(ndev);
1750 	teardown_virtqueues(ndev);
1751 	ndev->setup = false;
1752 out:
1753 	mutex_unlock(&ndev->reslock);
1754 }
1755 
1756 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1757 {
1758 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1759 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1760 	int err;
1761 
1762 	print_status(mvdev, status, true);
1763 	if (!status) {
1764 		mlx5_vdpa_info(mvdev, "performing device reset\n");
1765 		teardown_driver(ndev);
1766 		mlx5_vdpa_destroy_mr(&ndev->mvdev);
1767 		ndev->mvdev.status = 0;
1768 		ndev->mvdev.mlx_features = 0;
1769 		++mvdev->generation;
1770 		return;
1771 	}
1772 
1773 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1774 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1775 			err = setup_driver(ndev);
1776 			if (err) {
1777 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1778 				goto err_setup;
1779 			}
1780 		} else {
1781 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1782 			return;
1783 		}
1784 	}
1785 
1786 	ndev->mvdev.status = status;
1787 	return;
1788 
1789 err_setup:
1790 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1791 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1792 }
1793 
1794 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1795 				 unsigned int len)
1796 {
1797 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1798 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1799 
1800 	if (offset + len < sizeof(struct virtio_net_config))
1801 		memcpy(buf, (u8 *)&ndev->config + offset, len);
1802 }
1803 
1804 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1805 				 unsigned int len)
1806 {
1807 	/* not supported */
1808 }
1809 
1810 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1811 {
1812 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1813 
1814 	return mvdev->generation;
1815 }
1816 
1817 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1818 {
1819 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1820 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1821 	bool change_map;
1822 	int err;
1823 
1824 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1825 	if (err) {
1826 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1827 		return err;
1828 	}
1829 
1830 	if (change_map)
1831 		return mlx5_vdpa_change_map(ndev, iotlb);
1832 
1833 	return 0;
1834 }
1835 
1836 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1837 {
1838 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1839 	struct mlx5_vdpa_net *ndev;
1840 
1841 	ndev = to_mlx5_vdpa_ndev(mvdev);
1842 
1843 	free_resources(ndev);
1844 	mlx5_vdpa_free_resources(&ndev->mvdev);
1845 	mutex_destroy(&ndev->reslock);
1846 }
1847 
1848 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1849 {
1850 	struct vdpa_notification_area ret = {};
1851 
1852 	return ret;
1853 }
1854 
1855 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1856 {
1857 	return -EOPNOTSUPP;
1858 }
1859 
1860 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1861 	.set_vq_address = mlx5_vdpa_set_vq_address,
1862 	.set_vq_num = mlx5_vdpa_set_vq_num,
1863 	.kick_vq = mlx5_vdpa_kick_vq,
1864 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
1865 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
1866 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
1867 	.set_vq_state = mlx5_vdpa_set_vq_state,
1868 	.get_vq_state = mlx5_vdpa_get_vq_state,
1869 	.get_vq_notification = mlx5_get_vq_notification,
1870 	.get_vq_irq = mlx5_get_vq_irq,
1871 	.get_vq_align = mlx5_vdpa_get_vq_align,
1872 	.get_features = mlx5_vdpa_get_features,
1873 	.set_features = mlx5_vdpa_set_features,
1874 	.set_config_cb = mlx5_vdpa_set_config_cb,
1875 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1876 	.get_device_id = mlx5_vdpa_get_device_id,
1877 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
1878 	.get_status = mlx5_vdpa_get_status,
1879 	.set_status = mlx5_vdpa_set_status,
1880 	.get_config = mlx5_vdpa_get_config,
1881 	.set_config = mlx5_vdpa_set_config,
1882 	.get_generation = mlx5_vdpa_get_generation,
1883 	.set_map = mlx5_vdpa_set_map,
1884 	.free = mlx5_vdpa_free,
1885 };
1886 
1887 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1888 {
1889 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1890 	int err;
1891 
1892 	if (res->valid) {
1893 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1894 		return -EEXIST;
1895 	}
1896 
1897 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1898 	if (err)
1899 		return err;
1900 
1901 	err = create_tis(ndev);
1902 	if (err)
1903 		goto err_tis;
1904 
1905 	res->valid = true;
1906 
1907 	return 0;
1908 
1909 err_tis:
1910 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1911 	return err;
1912 }
1913 
1914 static void free_resources(struct mlx5_vdpa_net *ndev)
1915 {
1916 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1917 
1918 	if (!res->valid)
1919 		return;
1920 
1921 	destroy_tis(ndev);
1922 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1923 	res->valid = false;
1924 }
1925 
1926 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1927 {
1928 	struct mlx5_vdpa_virtqueue *mvq;
1929 	int i;
1930 
1931 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1932 		mvq = &ndev->vqs[i];
1933 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1934 		mvq->index = i;
1935 		mvq->ndev = ndev;
1936 		mvq->fwqp.fw = true;
1937 	}
1938 	for (; i < ndev->mvdev.max_vqs; i++) {
1939 		mvq = &ndev->vqs[i];
1940 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1941 		mvq->index = i;
1942 		mvq->ndev = ndev;
1943 	}
1944 }
1945 
1946 static int mlx5v_probe(struct auxiliary_device *adev,
1947 		       const struct auxiliary_device_id *id)
1948 {
1949 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
1950 	struct mlx5_core_dev *mdev = madev->mdev;
1951 	struct virtio_net_config *config;
1952 	struct mlx5_vdpa_dev *mvdev;
1953 	struct mlx5_vdpa_net *ndev;
1954 	u32 max_vqs;
1955 	int err;
1956 
1957 	/* we save one virtqueue for control virtqueue should we require it */
1958 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
1959 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
1960 
1961 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1962 				 2 * mlx5_vdpa_max_qps(max_vqs));
1963 	if (IS_ERR(ndev))
1964 		return PTR_ERR(ndev);
1965 
1966 	ndev->mvdev.max_vqs = max_vqs;
1967 	mvdev = &ndev->mvdev;
1968 	mvdev->mdev = mdev;
1969 	init_mvqs(ndev);
1970 	mutex_init(&ndev->reslock);
1971 	config = &ndev->config;
1972 	err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu);
1973 	if (err)
1974 		goto err_mtu;
1975 
1976 	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
1977 	if (err)
1978 		goto err_mtu;
1979 
1980 	mvdev->vdev.dma_dev = mdev->device;
1981 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
1982 	if (err)
1983 		goto err_mtu;
1984 
1985 	err = alloc_resources(ndev);
1986 	if (err)
1987 		goto err_res;
1988 
1989 	err = vdpa_register_device(&mvdev->vdev);
1990 	if (err)
1991 		goto err_reg;
1992 
1993 	dev_set_drvdata(&adev->dev, ndev);
1994 	return 0;
1995 
1996 err_reg:
1997 	free_resources(ndev);
1998 err_res:
1999 	mlx5_vdpa_free_resources(&ndev->mvdev);
2000 err_mtu:
2001 	mutex_destroy(&ndev->reslock);
2002 	put_device(&mvdev->vdev.dev);
2003 	return err;
2004 }
2005 
2006 static void mlx5v_remove(struct auxiliary_device *adev)
2007 {
2008 	struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev);
2009 
2010 	vdpa_unregister_device(&mvdev->vdev);
2011 }
2012 
2013 static const struct auxiliary_device_id mlx5v_id_table[] = {
2014 	{ .name = MLX5_ADEV_NAME ".vnet", },
2015 	{},
2016 };
2017 
2018 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2019 
2020 static struct auxiliary_driver mlx5v_driver = {
2021 	.name = "vnet",
2022 	.probe = mlx5v_probe,
2023 	.remove = mlx5v_remove,
2024 	.id_table = mlx5v_id_table,
2025 };
2026 
2027 module_auxiliary_driver(mlx5v_driver);
2028