xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 852a53a0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/vdpa.h>
5 #include <uapi/linux/virtio_ids.h>
6 #include <linux/virtio_config.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/device.h>
9 #include <linux/mlx5/vport.h>
10 #include <linux/mlx5/fs.h>
11 #include <linux/mlx5/device.h>
12 #include "mlx5_vnet.h"
13 #include "mlx5_vdpa_ifc.h"
14 #include "mlx5_vdpa.h"
15 
16 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
17 
18 #define VALID_FEATURES_MASK                                                                        \
19 	(BIT(VIRTIO_NET_F_CSUM) | BIT(VIRTIO_NET_F_GUEST_CSUM) |                                   \
20 	 BIT(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT(VIRTIO_NET_F_MTU) | BIT(VIRTIO_NET_F_MAC) |   \
21 	 BIT(VIRTIO_NET_F_GUEST_TSO4) | BIT(VIRTIO_NET_F_GUEST_TSO6) |                             \
22 	 BIT(VIRTIO_NET_F_GUEST_ECN) | BIT(VIRTIO_NET_F_GUEST_UFO) | BIT(VIRTIO_NET_F_HOST_TSO4) | \
23 	 BIT(VIRTIO_NET_F_HOST_TSO6) | BIT(VIRTIO_NET_F_HOST_ECN) | BIT(VIRTIO_NET_F_HOST_UFO) |   \
24 	 BIT(VIRTIO_NET_F_MRG_RXBUF) | BIT(VIRTIO_NET_F_STATUS) | BIT(VIRTIO_NET_F_CTRL_VQ) |      \
25 	 BIT(VIRTIO_NET_F_CTRL_RX) | BIT(VIRTIO_NET_F_CTRL_VLAN) |                                 \
26 	 BIT(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
27 	 BIT(VIRTIO_NET_F_MQ) | BIT(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT(VIRTIO_NET_F_HASH_REPORT) |  \
28 	 BIT(VIRTIO_NET_F_RSS) | BIT(VIRTIO_NET_F_RSC_EXT) | BIT(VIRTIO_NET_F_STANDBY) |           \
29 	 BIT(VIRTIO_NET_F_SPEED_DUPLEX) | BIT(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
30 	 BIT(VIRTIO_F_ANY_LAYOUT) | BIT(VIRTIO_F_VERSION_1) | BIT(VIRTIO_F_ACCESS_PLATFORM) |      \
31 	 BIT(VIRTIO_F_RING_PACKED) | BIT(VIRTIO_F_ORDER_PLATFORM) | BIT(VIRTIO_F_SR_IOV))
32 
33 #define VALID_STATUS_MASK                                                                          \
34 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
35 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
36 
37 struct mlx5_vdpa_net_resources {
38 	u32 tisn;
39 	u32 tdn;
40 	u32 tirn;
41 	u32 rqtn;
42 	bool valid;
43 };
44 
45 struct mlx5_vdpa_cq_buf {
46 	struct mlx5_frag_buf_ctrl fbc;
47 	struct mlx5_frag_buf frag_buf;
48 	int cqe_size;
49 	int nent;
50 };
51 
52 struct mlx5_vdpa_cq {
53 	struct mlx5_core_cq mcq;
54 	struct mlx5_vdpa_cq_buf buf;
55 	struct mlx5_db db;
56 	int cqe;
57 };
58 
59 struct mlx5_vdpa_umem {
60 	struct mlx5_frag_buf_ctrl fbc;
61 	struct mlx5_frag_buf frag_buf;
62 	int size;
63 	u32 id;
64 };
65 
66 struct mlx5_vdpa_qp {
67 	struct mlx5_core_qp mqp;
68 	struct mlx5_frag_buf frag_buf;
69 	struct mlx5_db db;
70 	u16 head;
71 	bool fw;
72 };
73 
74 struct mlx5_vq_restore_info {
75 	u32 num_ent;
76 	u64 desc_addr;
77 	u64 device_addr;
78 	u64 driver_addr;
79 	u16 avail_index;
80 	bool ready;
81 	struct vdpa_callback cb;
82 	bool restore;
83 };
84 
85 struct mlx5_vdpa_virtqueue {
86 	bool ready;
87 	u64 desc_addr;
88 	u64 device_addr;
89 	u64 driver_addr;
90 	u32 num_ent;
91 	struct vdpa_callback event_cb;
92 
93 	/* Resources for implementing the notification channel from the device
94 	 * to the driver. fwqp is the firmware end of an RC connection; the
95 	 * other end is vqqp used by the driver. cq is is where completions are
96 	 * reported.
97 	 */
98 	struct mlx5_vdpa_cq cq;
99 	struct mlx5_vdpa_qp fwqp;
100 	struct mlx5_vdpa_qp vqqp;
101 
102 	/* umem resources are required for the virtqueue operation. They're use
103 	 * is internal and they must be provided by the driver.
104 	 */
105 	struct mlx5_vdpa_umem umem1;
106 	struct mlx5_vdpa_umem umem2;
107 	struct mlx5_vdpa_umem umem3;
108 
109 	bool initialized;
110 	int index;
111 	u32 virtq_id;
112 	struct mlx5_vdpa_net *ndev;
113 	u16 avail_idx;
114 	int fw_state;
115 
116 	/* keep last in the struct */
117 	struct mlx5_vq_restore_info ri;
118 };
119 
120 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
121  * provides for driver space allocation
122  */
123 #define MLX5_MAX_SUPPORTED_VQS 16
124 
125 struct mlx5_vdpa_net {
126 	struct mlx5_vdpa_dev mvdev;
127 	struct mlx5_vdpa_net_resources res;
128 	struct virtio_net_config config;
129 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
130 
131 	/* Serialize vq resources creation and destruction. This is required
132 	 * since memory map might change and we need to destroy and create
133 	 * resources while driver in operational.
134 	 */
135 	struct mutex reslock;
136 	struct mlx5_flow_table *rxft;
137 	struct mlx5_fc *rx_counter;
138 	struct mlx5_flow_handle *rx_rule;
139 	bool setup;
140 	u16 mtu;
141 };
142 
143 static void free_resources(struct mlx5_vdpa_net *ndev);
144 static void init_mvqs(struct mlx5_vdpa_net *ndev);
145 static int setup_driver(struct mlx5_vdpa_net *ndev);
146 static void teardown_driver(struct mlx5_vdpa_net *ndev);
147 
148 static bool mlx5_vdpa_debug;
149 
150 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
151 	do {                                                                                       \
152 		if (features & BIT(_feature))                                                      \
153 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
154 	} while (0)
155 
156 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
157 	do {                                                                                       \
158 		if (status & (_status))                                                            \
159 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
160 	} while (0)
161 
162 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
163 {
164 	if (status & ~VALID_STATUS_MASK)
165 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
166 			       status & ~VALID_STATUS_MASK);
167 
168 	if (!mlx5_vdpa_debug)
169 		return;
170 
171 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
172 	if (set && !status) {
173 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
174 		return;
175 	}
176 
177 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
178 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
179 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
180 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
181 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
182 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
183 }
184 
185 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
186 {
187 	if (features & ~VALID_FEATURES_MASK)
188 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
189 			       features & ~VALID_FEATURES_MASK);
190 
191 	if (!mlx5_vdpa_debug)
192 		return;
193 
194 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
195 	if (!features)
196 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
197 
198 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
199 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
200 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
201 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
202 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
203 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
204 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
205 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
206 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
207 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
208 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
209 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
210 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
211 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
212 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
213 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
214 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
215 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
216 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
217 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
218 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
219 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
220 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
221 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
222 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
223 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
224 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
225 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
232 }
233 
234 static int create_tis(struct mlx5_vdpa_net *ndev)
235 {
236 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
237 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
238 	void *tisc;
239 	int err;
240 
241 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
242 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
243 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
244 	if (err)
245 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
246 
247 	return err;
248 }
249 
250 static void destroy_tis(struct mlx5_vdpa_net *ndev)
251 {
252 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
253 }
254 
255 #define MLX5_VDPA_CQE_SIZE 64
256 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
257 
258 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
259 {
260 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
261 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
262 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
263 	int err;
264 
265 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
266 				       ndev->mvdev.mdev->priv.numa_node);
267 	if (err)
268 		return err;
269 
270 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
271 
272 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
273 	buf->nent = nent;
274 
275 	return 0;
276 }
277 
278 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
279 {
280 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
281 
282 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
283 					ndev->mvdev.mdev->priv.numa_node);
284 }
285 
286 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
287 {
288 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
289 }
290 
291 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
292 {
293 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
294 }
295 
296 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
297 {
298 	struct mlx5_cqe64 *cqe64;
299 	void *cqe;
300 	int i;
301 
302 	for (i = 0; i < buf->nent; i++) {
303 		cqe = get_cqe(vcq, i);
304 		cqe64 = cqe;
305 		cqe64->op_own = MLX5_CQE_INVALID << 4;
306 	}
307 }
308 
309 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
310 {
311 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
312 
313 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
314 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
315 		return cqe64;
316 
317 	return NULL;
318 }
319 
320 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
321 {
322 	vqp->head += n;
323 	vqp->db.db[0] = cpu_to_be32(vqp->head);
324 }
325 
326 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
327 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
328 {
329 	struct mlx5_vdpa_qp *vqp;
330 	__be64 *pas;
331 	void *qpc;
332 
333 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
334 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
335 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
336 	if (vqp->fw) {
337 		/* Firmware QP is allocated by the driver for the firmware's
338 		 * use so we can skip part of the params as they will be chosen by firmware
339 		 */
340 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
341 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
342 		MLX5_SET(qpc, qpc, no_sq, 1);
343 		return;
344 	}
345 
346 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
347 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
348 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
349 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
350 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
351 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
352 	MLX5_SET(qpc, qpc, no_sq, 1);
353 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
354 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
355 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
356 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
357 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
358 }
359 
360 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
361 {
362 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
363 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
364 					ndev->mvdev.mdev->priv.numa_node);
365 }
366 
367 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
368 {
369 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
370 }
371 
372 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
373 		     struct mlx5_vdpa_qp *vqp)
374 {
375 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
376 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
377 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
378 	void *qpc;
379 	void *in;
380 	int err;
381 
382 	if (!vqp->fw) {
383 		vqp = &mvq->vqqp;
384 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
385 		if (err)
386 			return err;
387 
388 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
389 		if (err)
390 			goto err_db;
391 		inlen += vqp->frag_buf.npages * sizeof(__be64);
392 	}
393 
394 	in = kzalloc(inlen, GFP_KERNEL);
395 	if (!in) {
396 		err = -ENOMEM;
397 		goto err_kzalloc;
398 	}
399 
400 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
401 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
402 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
403 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
404 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
405 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
406 	if (!vqp->fw)
407 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
408 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
409 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
410 	kfree(in);
411 	if (err)
412 		goto err_kzalloc;
413 
414 	vqp->mqp.uid = ndev->mvdev.res.uid;
415 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
416 
417 	if (!vqp->fw)
418 		rx_post(vqp, mvq->num_ent);
419 
420 	return 0;
421 
422 err_kzalloc:
423 	if (!vqp->fw)
424 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
425 err_db:
426 	if (!vqp->fw)
427 		rq_buf_free(ndev, vqp);
428 
429 	return err;
430 }
431 
432 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
433 {
434 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
435 
436 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
437 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
438 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
439 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
440 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
441 	if (!vqp->fw) {
442 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
443 		rq_buf_free(ndev, vqp);
444 	}
445 }
446 
447 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
448 {
449 	return get_sw_cqe(cq, cq->mcq.cons_index);
450 }
451 
452 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
453 {
454 	struct mlx5_cqe64 *cqe64;
455 
456 	cqe64 = next_cqe_sw(vcq);
457 	if (!cqe64)
458 		return -EAGAIN;
459 
460 	vcq->mcq.cons_index++;
461 	return 0;
462 }
463 
464 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
465 {
466 	mlx5_cq_set_ci(&mvq->cq.mcq);
467 	rx_post(&mvq->vqqp, num);
468 	if (mvq->event_cb.callback)
469 		mvq->event_cb.callback(mvq->event_cb.private);
470 }
471 
472 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
473 {
474 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
475 	struct mlx5_vdpa_net *ndev = mvq->ndev;
476 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
477 	int num = 0;
478 
479 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
480 		num++;
481 		if (num > mvq->num_ent / 2) {
482 			/* If completions keep coming while we poll, we want to
483 			 * let the hardware know that we consumed them by
484 			 * updating the doorbell record.  We also let vdpa core
485 			 * know about this so it passes it on the virtio driver
486 			 * on the guest.
487 			 */
488 			mlx5_vdpa_handle_completions(mvq, num);
489 			num = 0;
490 		}
491 	}
492 
493 	if (num)
494 		mlx5_vdpa_handle_completions(mvq, num);
495 
496 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
497 }
498 
499 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
500 {
501 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
502 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
503 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
504 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
505 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
506 	unsigned int irqn;
507 	__be64 *pas;
508 	int inlen;
509 	void *cqc;
510 	void *in;
511 	int err;
512 	int eqn;
513 
514 	err = mlx5_db_alloc(mdev, &vcq->db);
515 	if (err)
516 		return err;
517 
518 	vcq->mcq.set_ci_db = vcq->db.db;
519 	vcq->mcq.arm_db = vcq->db.db + 1;
520 	vcq->mcq.cqe_sz = 64;
521 
522 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
523 	if (err)
524 		goto err_db;
525 
526 	cq_frag_buf_init(vcq, &vcq->buf);
527 
528 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
529 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
530 	in = kzalloc(inlen, GFP_KERNEL);
531 	if (!in) {
532 		err = -ENOMEM;
533 		goto err_vzalloc;
534 	}
535 
536 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
537 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
538 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
539 
540 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
541 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
542 
543 	/* Use vector 0 by default. Consider adding code to choose least used
544 	 * vector.
545 	 */
546 	err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
547 	if (err)
548 		goto err_vec;
549 
550 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
551 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
552 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
553 	MLX5_SET(cqc, cqc, c_eqn, eqn);
554 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
555 
556 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
557 	if (err)
558 		goto err_vec;
559 
560 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
561 	vcq->cqe = num_ent;
562 	vcq->mcq.set_ci_db = vcq->db.db;
563 	vcq->mcq.arm_db = vcq->db.db + 1;
564 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
565 	kfree(in);
566 	return 0;
567 
568 err_vec:
569 	kfree(in);
570 err_vzalloc:
571 	cq_frag_buf_free(ndev, &vcq->buf);
572 err_db:
573 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
574 	return err;
575 }
576 
577 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
578 {
579 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
580 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
581 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
582 
583 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
584 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
585 		return;
586 	}
587 	cq_frag_buf_free(ndev, &vcq->buf);
588 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
589 }
590 
591 static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
592 		     struct mlx5_vdpa_umem **umemp)
593 {
594 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
595 	int p_a;
596 	int p_b;
597 
598 	switch (num) {
599 	case 1:
600 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
601 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
602 		*umemp = &mvq->umem1;
603 		break;
604 	case 2:
605 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
606 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
607 		*umemp = &mvq->umem2;
608 		break;
609 	case 3:
610 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
611 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
612 		*umemp = &mvq->umem3;
613 		break;
614 	}
615 	return p_a * mvq->num_ent + p_b;
616 }
617 
618 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
619 {
620 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
621 }
622 
623 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
624 {
625 	int inlen;
626 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
627 	void *um;
628 	void *in;
629 	int err;
630 	__be64 *pas;
631 	int size;
632 	struct mlx5_vdpa_umem *umem;
633 
634 	size = umem_size(ndev, mvq, num, &umem);
635 	if (size < 0)
636 		return size;
637 
638 	umem->size = size;
639 	err = umem_frag_buf_alloc(ndev, umem, size);
640 	if (err)
641 		return err;
642 
643 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
644 
645 	in = kzalloc(inlen, GFP_KERNEL);
646 	if (!in) {
647 		err = -ENOMEM;
648 		goto err_in;
649 	}
650 
651 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
652 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
653 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
654 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
655 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
656 
657 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
658 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
659 
660 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
661 	if (err) {
662 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
663 		goto err_cmd;
664 	}
665 
666 	kfree(in);
667 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
668 
669 	return 0;
670 
671 err_cmd:
672 	kfree(in);
673 err_in:
674 	umem_frag_buf_free(ndev, umem);
675 	return err;
676 }
677 
678 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
679 {
680 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
681 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
682 	struct mlx5_vdpa_umem *umem;
683 
684 	switch (num) {
685 	case 1:
686 		umem = &mvq->umem1;
687 		break;
688 	case 2:
689 		umem = &mvq->umem2;
690 		break;
691 	case 3:
692 		umem = &mvq->umem3;
693 		break;
694 	}
695 
696 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
697 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
698 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
699 		return;
700 
701 	umem_frag_buf_free(ndev, umem);
702 }
703 
704 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
705 {
706 	int num;
707 	int err;
708 
709 	for (num = 1; num <= 3; num++) {
710 		err = create_umem(ndev, mvq, num);
711 		if (err)
712 			goto err_umem;
713 	}
714 	return 0;
715 
716 err_umem:
717 	for (num--; num > 0; num--)
718 		umem_destroy(ndev, mvq, num);
719 
720 	return err;
721 }
722 
723 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
724 {
725 	int num;
726 
727 	for (num = 3; num > 0; num--)
728 		umem_destroy(ndev, mvq, num);
729 }
730 
731 static int get_queue_type(struct mlx5_vdpa_net *ndev)
732 {
733 	u32 type_mask;
734 
735 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
736 
737 	/* prefer split queue */
738 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
739 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
740 
741 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
742 
743 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
744 }
745 
746 static bool vq_is_tx(u16 idx)
747 {
748 	return idx % 2;
749 }
750 
751 static u16 get_features_12_3(u64 features)
752 {
753 	return (!!(features & BIT(VIRTIO_NET_F_HOST_TSO4)) << 9) |
754 	       (!!(features & BIT(VIRTIO_NET_F_HOST_TSO6)) << 8) |
755 	       (!!(features & BIT(VIRTIO_NET_F_CSUM)) << 7) |
756 	       (!!(features & BIT(VIRTIO_NET_F_GUEST_CSUM)) << 6);
757 }
758 
759 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
760 {
761 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
762 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
763 	void *obj_context;
764 	void *cmd_hdr;
765 	void *vq_ctx;
766 	void *in;
767 	int err;
768 
769 	err = umems_create(ndev, mvq);
770 	if (err)
771 		return err;
772 
773 	in = kzalloc(inlen, GFP_KERNEL);
774 	if (!in) {
775 		err = -ENOMEM;
776 		goto err_alloc;
777 	}
778 
779 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
780 
781 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
782 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
783 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
784 
785 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
786 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
787 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
788 		 get_features_12_3(ndev->mvdev.actual_features));
789 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
790 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
791 
792 	if (vq_is_tx(mvq->index))
793 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
794 
795 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
796 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
797 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
798 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
799 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
800 		 !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1));
801 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
802 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
803 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
804 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
805 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
806 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
807 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
808 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
809 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
810 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
811 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
812 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
813 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
814 
815 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
816 	if (err)
817 		goto err_cmd;
818 
819 	kfree(in);
820 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
821 
822 	return 0;
823 
824 err_cmd:
825 	kfree(in);
826 err_alloc:
827 	umems_destroy(ndev, mvq);
828 	return err;
829 }
830 
831 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
832 {
833 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
834 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
835 
836 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
837 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
838 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
839 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
840 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
841 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
842 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
843 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
844 		return;
845 	}
846 	umems_destroy(ndev, mvq);
847 }
848 
849 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
850 {
851 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
852 }
853 
854 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
855 {
856 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
857 }
858 
859 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
860 			int *outlen, u32 qpn, u32 rqpn)
861 {
862 	void *qpc;
863 	void *pp;
864 
865 	switch (cmd) {
866 	case MLX5_CMD_OP_2RST_QP:
867 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
868 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
869 		*in = kzalloc(*inlen, GFP_KERNEL);
870 		*out = kzalloc(*outlen, GFP_KERNEL);
871 		if (!*in || !*out)
872 			goto outerr;
873 
874 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
875 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
876 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
877 		break;
878 	case MLX5_CMD_OP_RST2INIT_QP:
879 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
880 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
881 		*in = kzalloc(*inlen, GFP_KERNEL);
882 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
883 		if (!*in || !*out)
884 			goto outerr;
885 
886 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
887 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
888 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
889 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
890 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
891 		MLX5_SET(qpc, qpc, rwe, 1);
892 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
893 		MLX5_SET(ads, pp, vhca_port_num, 1);
894 		break;
895 	case MLX5_CMD_OP_INIT2RTR_QP:
896 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
897 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
898 		*in = kzalloc(*inlen, GFP_KERNEL);
899 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
900 		if (!*in || !*out)
901 			goto outerr;
902 
903 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
904 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
905 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
906 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
907 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
908 		MLX5_SET(qpc, qpc, log_msg_max, 30);
909 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
910 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
911 		MLX5_SET(ads, pp, fl, 1);
912 		break;
913 	case MLX5_CMD_OP_RTR2RTS_QP:
914 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
915 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
916 		*in = kzalloc(*inlen, GFP_KERNEL);
917 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
918 		if (!*in || !*out)
919 			goto outerr;
920 
921 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
922 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
923 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
924 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
925 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
926 		MLX5_SET(ads, pp, ack_timeout, 14);
927 		MLX5_SET(qpc, qpc, retry_count, 7);
928 		MLX5_SET(qpc, qpc, rnr_retry, 7);
929 		break;
930 	default:
931 		goto outerr_nullify;
932 	}
933 
934 	return;
935 
936 outerr:
937 	kfree(*in);
938 	kfree(*out);
939 outerr_nullify:
940 	*in = NULL;
941 	*out = NULL;
942 }
943 
944 static void free_inout(void *in, void *out)
945 {
946 	kfree(in);
947 	kfree(out);
948 }
949 
950 /* Two QPs are used by each virtqueue. One is used by the driver and one by
951  * firmware. The fw argument indicates whether the subjected QP is the one used
952  * by firmware.
953  */
954 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
955 {
956 	int outlen;
957 	int inlen;
958 	void *out;
959 	void *in;
960 	int err;
961 
962 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
963 	if (!in || !out)
964 		return -ENOMEM;
965 
966 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
967 	free_inout(in, out);
968 	return err;
969 }
970 
971 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
972 {
973 	int err;
974 
975 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
976 	if (err)
977 		return err;
978 
979 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
980 	if (err)
981 		return err;
982 
983 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
984 	if (err)
985 		return err;
986 
987 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
988 	if (err)
989 		return err;
990 
991 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
992 	if (err)
993 		return err;
994 
995 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
996 	if (err)
997 		return err;
998 
999 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1000 }
1001 
1002 struct mlx5_virtq_attr {
1003 	u8 state;
1004 	u16 available_index;
1005 };
1006 
1007 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1008 			   struct mlx5_virtq_attr *attr)
1009 {
1010 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1011 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1012 	void *out;
1013 	void *obj_context;
1014 	void *cmd_hdr;
1015 	int err;
1016 
1017 	out = kzalloc(outlen, GFP_KERNEL);
1018 	if (!out)
1019 		return -ENOMEM;
1020 
1021 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1022 
1023 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1024 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1025 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1026 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1027 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1028 	if (err)
1029 		goto err_cmd;
1030 
1031 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1032 	memset(attr, 0, sizeof(*attr));
1033 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1034 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1035 	kfree(out);
1036 	return 0;
1037 
1038 err_cmd:
1039 	kfree(out);
1040 	return err;
1041 }
1042 
1043 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1044 {
1045 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1046 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1047 	void *obj_context;
1048 	void *cmd_hdr;
1049 	void *in;
1050 	int err;
1051 
1052 	in = kzalloc(inlen, GFP_KERNEL);
1053 	if (!in)
1054 		return -ENOMEM;
1055 
1056 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1057 
1058 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1059 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1060 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1061 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1062 
1063 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1064 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1065 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1066 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1067 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1068 	kfree(in);
1069 	if (!err)
1070 		mvq->fw_state = state;
1071 
1072 	return err;
1073 }
1074 
1075 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1076 {
1077 	u16 idx = mvq->index;
1078 	int err;
1079 
1080 	if (!mvq->num_ent)
1081 		return 0;
1082 
1083 	if (mvq->initialized) {
1084 		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1085 		return -EINVAL;
1086 	}
1087 
1088 	err = cq_create(ndev, idx, mvq->num_ent);
1089 	if (err)
1090 		return err;
1091 
1092 	err = qp_create(ndev, mvq, &mvq->fwqp);
1093 	if (err)
1094 		goto err_fwqp;
1095 
1096 	err = qp_create(ndev, mvq, &mvq->vqqp);
1097 	if (err)
1098 		goto err_vqqp;
1099 
1100 	err = connect_qps(ndev, mvq);
1101 	if (err)
1102 		goto err_connect;
1103 
1104 	err = create_virtqueue(ndev, mvq);
1105 	if (err)
1106 		goto err_connect;
1107 
1108 	if (mvq->ready) {
1109 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1110 		if (err) {
1111 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1112 				       idx, err);
1113 			goto err_connect;
1114 		}
1115 	}
1116 
1117 	mvq->initialized = true;
1118 	return 0;
1119 
1120 err_connect:
1121 	qp_destroy(ndev, &mvq->vqqp);
1122 err_vqqp:
1123 	qp_destroy(ndev, &mvq->fwqp);
1124 err_fwqp:
1125 	cq_destroy(ndev, idx);
1126 	return err;
1127 }
1128 
1129 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1130 {
1131 	struct mlx5_virtq_attr attr;
1132 
1133 	if (!mvq->initialized)
1134 		return;
1135 
1136 	if (query_virtqueue(ndev, mvq, &attr)) {
1137 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1138 		return;
1139 	}
1140 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1141 		return;
1142 
1143 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1144 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1145 }
1146 
1147 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1148 {
1149 	int i;
1150 
1151 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1152 		suspend_vq(ndev, &ndev->vqs[i]);
1153 }
1154 
1155 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1156 {
1157 	if (!mvq->initialized)
1158 		return;
1159 
1160 	suspend_vq(ndev, mvq);
1161 	destroy_virtqueue(ndev, mvq);
1162 	qp_destroy(ndev, &mvq->vqqp);
1163 	qp_destroy(ndev, &mvq->fwqp);
1164 	cq_destroy(ndev, mvq->index);
1165 	mvq->initialized = false;
1166 }
1167 
1168 static int create_rqt(struct mlx5_vdpa_net *ndev)
1169 {
1170 	int log_max_rqt;
1171 	__be32 *list;
1172 	void *rqtc;
1173 	int inlen;
1174 	void *in;
1175 	int i, j;
1176 	int err;
1177 
1178 	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1179 	if (log_max_rqt < 1)
1180 		return -EOPNOTSUPP;
1181 
1182 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1183 	in = kzalloc(inlen, GFP_KERNEL);
1184 	if (!in)
1185 		return -ENOMEM;
1186 
1187 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1188 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1189 
1190 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1191 	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1192 	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1193 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1194 	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1195 		if (!ndev->vqs[j].initialized)
1196 			continue;
1197 
1198 		if (!vq_is_tx(ndev->vqs[j].index)) {
1199 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1200 			i++;
1201 		}
1202 	}
1203 
1204 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1205 	kfree(in);
1206 	if (err)
1207 		return err;
1208 
1209 	return 0;
1210 }
1211 
1212 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1213 {
1214 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1215 }
1216 
1217 static int create_tir(struct mlx5_vdpa_net *ndev)
1218 {
1219 #define HASH_IP_L4PORTS                                                                            \
1220 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1221 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1222 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1223 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1224 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1225 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1226 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1227 	void *rss_key;
1228 	void *outer;
1229 	void *tirc;
1230 	void *in;
1231 	int err;
1232 
1233 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1234 	if (!in)
1235 		return -ENOMEM;
1236 
1237 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1238 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1239 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1240 
1241 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1242 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1243 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1244 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1245 
1246 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1247 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1248 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1249 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1250 
1251 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1252 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1253 
1254 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1255 	kfree(in);
1256 	return err;
1257 }
1258 
1259 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1260 {
1261 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1262 }
1263 
1264 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1265 {
1266 	struct mlx5_flow_destination dest[2] = {};
1267 	struct mlx5_flow_table_attr ft_attr = {};
1268 	struct mlx5_flow_act flow_act = {};
1269 	struct mlx5_flow_namespace *ns;
1270 	int err;
1271 
1272 	/* for now, one entry, match all, forward to tir */
1273 	ft_attr.max_fte = 1;
1274 	ft_attr.autogroup.max_num_groups = 1;
1275 
1276 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1277 	if (!ns) {
1278 		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1279 		return -EOPNOTSUPP;
1280 	}
1281 
1282 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1283 	if (IS_ERR(ndev->rxft))
1284 		return PTR_ERR(ndev->rxft);
1285 
1286 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1287 	if (IS_ERR(ndev->rx_counter)) {
1288 		err = PTR_ERR(ndev->rx_counter);
1289 		goto err_fc;
1290 	}
1291 
1292 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1293 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1294 	dest[0].tir_num = ndev->res.tirn;
1295 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1296 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1297 	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1298 	if (IS_ERR(ndev->rx_rule)) {
1299 		err = PTR_ERR(ndev->rx_rule);
1300 		ndev->rx_rule = NULL;
1301 		goto err_rule;
1302 	}
1303 
1304 	return 0;
1305 
1306 err_rule:
1307 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1308 err_fc:
1309 	mlx5_destroy_flow_table(ndev->rxft);
1310 	return err;
1311 }
1312 
1313 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1314 {
1315 	if (!ndev->rx_rule)
1316 		return;
1317 
1318 	mlx5_del_flow_rules(ndev->rx_rule);
1319 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1320 	mlx5_destroy_flow_table(ndev->rxft);
1321 
1322 	ndev->rx_rule = NULL;
1323 }
1324 
1325 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1326 {
1327 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1328 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1329 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1330 
1331 	if (unlikely(!mvq->ready))
1332 		return;
1333 
1334 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1335 }
1336 
1337 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1338 				    u64 driver_area, u64 device_area)
1339 {
1340 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1341 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1342 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1343 
1344 	mvq->desc_addr = desc_area;
1345 	mvq->device_addr = device_area;
1346 	mvq->driver_addr = driver_area;
1347 	return 0;
1348 }
1349 
1350 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1351 {
1352 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1353 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1354 	struct mlx5_vdpa_virtqueue *mvq;
1355 
1356 	mvq = &ndev->vqs[idx];
1357 	mvq->num_ent = num;
1358 }
1359 
1360 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1361 {
1362 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1363 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1364 	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1365 
1366 	vq->event_cb = *cb;
1367 }
1368 
1369 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1370 {
1371 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1372 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1373 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1374 
1375 	if (!ready)
1376 		suspend_vq(ndev, mvq);
1377 
1378 	mvq->ready = ready;
1379 }
1380 
1381 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1382 {
1383 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1384 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1385 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1386 
1387 	return mvq->ready;
1388 }
1389 
1390 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1391 				  const struct vdpa_vq_state *state)
1392 {
1393 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1394 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1395 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1396 
1397 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1398 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1399 		return -EINVAL;
1400 	}
1401 
1402 	mvq->avail_idx = state->avail_index;
1403 	return 0;
1404 }
1405 
1406 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1407 {
1408 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1409 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1410 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1411 	struct mlx5_virtq_attr attr;
1412 	int err;
1413 
1414 	if (!mvq->initialized)
1415 		return -EAGAIN;
1416 
1417 	err = query_virtqueue(ndev, mvq, &attr);
1418 	if (err) {
1419 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1420 		return err;
1421 	}
1422 	state->avail_index = attr.available_index;
1423 	return 0;
1424 }
1425 
1426 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1427 {
1428 	return PAGE_SIZE;
1429 }
1430 
1431 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1432 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1433 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1434 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1435 };
1436 
1437 static u64 mlx_to_vritio_features(u16 dev_features)
1438 {
1439 	u64 result = 0;
1440 
1441 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1442 		result |= BIT(VIRTIO_NET_F_GUEST_CSUM);
1443 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1444 		result |= BIT(VIRTIO_NET_F_CSUM);
1445 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1446 		result |= BIT(VIRTIO_NET_F_HOST_TSO6);
1447 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1448 		result |= BIT(VIRTIO_NET_F_HOST_TSO4);
1449 
1450 	return result;
1451 }
1452 
1453 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1454 {
1455 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1456 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1457 	u16 dev_features;
1458 
1459 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1460 	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1461 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1462 		ndev->mvdev.mlx_features |= BIT(VIRTIO_F_VERSION_1);
1463 	ndev->mvdev.mlx_features |= BIT(VIRTIO_F_ACCESS_PLATFORM);
1464 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1465 	return ndev->mvdev.mlx_features;
1466 }
1467 
1468 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1469 {
1470 	if (!(features & BIT(VIRTIO_F_ACCESS_PLATFORM)))
1471 		return -EOPNOTSUPP;
1472 
1473 	return 0;
1474 }
1475 
1476 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1477 {
1478 	int err;
1479 	int i;
1480 
1481 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1482 		err = setup_vq(ndev, &ndev->vqs[i]);
1483 		if (err)
1484 			goto err_vq;
1485 	}
1486 
1487 	return 0;
1488 
1489 err_vq:
1490 	for (--i; i >= 0; i--)
1491 		teardown_vq(ndev, &ndev->vqs[i]);
1492 
1493 	return err;
1494 }
1495 
1496 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1497 {
1498 	struct mlx5_vdpa_virtqueue *mvq;
1499 	int i;
1500 
1501 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1502 		mvq = &ndev->vqs[i];
1503 		if (!mvq->initialized)
1504 			continue;
1505 
1506 		teardown_vq(ndev, mvq);
1507 	}
1508 }
1509 
1510 /* TODO: cross-endian support */
1511 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1512 {
1513 	return virtio_legacy_is_little_endian() ||
1514 		(mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1));
1515 }
1516 
1517 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1518 {
1519 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1520 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1521 	int err;
1522 
1523 	print_features(mvdev, features, true);
1524 
1525 	err = verify_min_features(mvdev, features);
1526 	if (err)
1527 		return err;
1528 
1529 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1530 	ndev->config.mtu = __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev),
1531 					     ndev->mtu);
1532 	return err;
1533 }
1534 
1535 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1536 {
1537 	/* not implemented */
1538 	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1539 }
1540 
1541 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1542 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1543 {
1544 	return MLX5_VDPA_MAX_VQ_ENTRIES;
1545 }
1546 
1547 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1548 {
1549 	return VIRTIO_ID_NET;
1550 }
1551 
1552 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1553 {
1554 	return PCI_VENDOR_ID_MELLANOX;
1555 }
1556 
1557 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1558 {
1559 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1560 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1561 
1562 	print_status(mvdev, ndev->mvdev.status, false);
1563 	return ndev->mvdev.status;
1564 }
1565 
1566 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1567 {
1568 	struct mlx5_vq_restore_info *ri = &mvq->ri;
1569 	struct mlx5_virtq_attr attr;
1570 	int err;
1571 
1572 	if (!mvq->initialized)
1573 		return 0;
1574 
1575 	err = query_virtqueue(ndev, mvq, &attr);
1576 	if (err)
1577 		return err;
1578 
1579 	ri->avail_index = attr.available_index;
1580 	ri->ready = mvq->ready;
1581 	ri->num_ent = mvq->num_ent;
1582 	ri->desc_addr = mvq->desc_addr;
1583 	ri->device_addr = mvq->device_addr;
1584 	ri->driver_addr = mvq->driver_addr;
1585 	ri->cb = mvq->event_cb;
1586 	ri->restore = true;
1587 	return 0;
1588 }
1589 
1590 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1591 {
1592 	int i;
1593 
1594 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1595 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1596 		save_channel_info(ndev, &ndev->vqs[i]);
1597 	}
1598 	return 0;
1599 }
1600 
1601 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1602 {
1603 	int i;
1604 
1605 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1606 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1607 }
1608 
1609 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1610 {
1611 	struct mlx5_vdpa_virtqueue *mvq;
1612 	struct mlx5_vq_restore_info *ri;
1613 	int i;
1614 
1615 	mlx5_clear_vqs(ndev);
1616 	init_mvqs(ndev);
1617 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1618 		mvq = &ndev->vqs[i];
1619 		ri = &mvq->ri;
1620 		if (!ri->restore)
1621 			continue;
1622 
1623 		mvq->avail_idx = ri->avail_index;
1624 		mvq->ready = ri->ready;
1625 		mvq->num_ent = ri->num_ent;
1626 		mvq->desc_addr = ri->desc_addr;
1627 		mvq->device_addr = ri->device_addr;
1628 		mvq->driver_addr = ri->driver_addr;
1629 		mvq->event_cb = ri->cb;
1630 	}
1631 }
1632 
1633 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1634 {
1635 	int err;
1636 
1637 	suspend_vqs(ndev);
1638 	err = save_channels_info(ndev);
1639 	if (err)
1640 		goto err_mr;
1641 
1642 	teardown_driver(ndev);
1643 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1644 	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1645 	if (err)
1646 		goto err_mr;
1647 
1648 	restore_channels_info(ndev);
1649 	err = setup_driver(ndev);
1650 	if (err)
1651 		goto err_setup;
1652 
1653 	return 0;
1654 
1655 err_setup:
1656 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1657 err_mr:
1658 	return err;
1659 }
1660 
1661 static int setup_driver(struct mlx5_vdpa_net *ndev)
1662 {
1663 	int err;
1664 
1665 	mutex_lock(&ndev->reslock);
1666 	if (ndev->setup) {
1667 		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1668 		err = 0;
1669 		goto out;
1670 	}
1671 	err = setup_virtqueues(ndev);
1672 	if (err) {
1673 		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1674 		goto out;
1675 	}
1676 
1677 	err = create_rqt(ndev);
1678 	if (err) {
1679 		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1680 		goto err_rqt;
1681 	}
1682 
1683 	err = create_tir(ndev);
1684 	if (err) {
1685 		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1686 		goto err_tir;
1687 	}
1688 
1689 	err = add_fwd_to_tir(ndev);
1690 	if (err) {
1691 		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1692 		goto err_fwd;
1693 	}
1694 	ndev->setup = true;
1695 	mutex_unlock(&ndev->reslock);
1696 
1697 	return 0;
1698 
1699 err_fwd:
1700 	destroy_tir(ndev);
1701 err_tir:
1702 	destroy_rqt(ndev);
1703 err_rqt:
1704 	teardown_virtqueues(ndev);
1705 out:
1706 	mutex_unlock(&ndev->reslock);
1707 	return err;
1708 }
1709 
1710 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1711 {
1712 	mutex_lock(&ndev->reslock);
1713 	if (!ndev->setup)
1714 		goto out;
1715 
1716 	remove_fwd_to_tir(ndev);
1717 	destroy_tir(ndev);
1718 	destroy_rqt(ndev);
1719 	teardown_virtqueues(ndev);
1720 	ndev->setup = false;
1721 out:
1722 	mutex_unlock(&ndev->reslock);
1723 }
1724 
1725 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1726 {
1727 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1728 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1729 	int err;
1730 
1731 	print_status(mvdev, status, true);
1732 	if (!status) {
1733 		mlx5_vdpa_info(mvdev, "performing device reset\n");
1734 		teardown_driver(ndev);
1735 		mlx5_vdpa_destroy_mr(&ndev->mvdev);
1736 		ndev->mvdev.status = 0;
1737 		ndev->mvdev.mlx_features = 0;
1738 		++mvdev->generation;
1739 		return;
1740 	}
1741 
1742 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1743 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1744 			err = setup_driver(ndev);
1745 			if (err) {
1746 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1747 				goto err_setup;
1748 			}
1749 		} else {
1750 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1751 			return;
1752 		}
1753 	}
1754 
1755 	ndev->mvdev.status = status;
1756 	return;
1757 
1758 err_setup:
1759 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1760 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1761 }
1762 
1763 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1764 				 unsigned int len)
1765 {
1766 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1767 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1768 
1769 	if (offset + len < sizeof(struct virtio_net_config))
1770 		memcpy(buf, (u8 *)&ndev->config + offset, len);
1771 }
1772 
1773 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1774 				 unsigned int len)
1775 {
1776 	/* not supported */
1777 }
1778 
1779 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1780 {
1781 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1782 
1783 	return mvdev->generation;
1784 }
1785 
1786 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1787 {
1788 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1789 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1790 	bool change_map;
1791 	int err;
1792 
1793 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1794 	if (err) {
1795 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1796 		return err;
1797 	}
1798 
1799 	if (change_map)
1800 		return mlx5_vdpa_change_map(ndev, iotlb);
1801 
1802 	return 0;
1803 }
1804 
1805 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1806 {
1807 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1808 	struct mlx5_vdpa_net *ndev;
1809 
1810 	ndev = to_mlx5_vdpa_ndev(mvdev);
1811 
1812 	free_resources(ndev);
1813 	mlx5_vdpa_free_resources(&ndev->mvdev);
1814 	mutex_destroy(&ndev->reslock);
1815 }
1816 
1817 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1818 {
1819 	struct vdpa_notification_area ret = {};
1820 
1821 	return ret;
1822 }
1823 
1824 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1825 {
1826 	return -EOPNOTSUPP;
1827 }
1828 
1829 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1830 	.set_vq_address = mlx5_vdpa_set_vq_address,
1831 	.set_vq_num = mlx5_vdpa_set_vq_num,
1832 	.kick_vq = mlx5_vdpa_kick_vq,
1833 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
1834 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
1835 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
1836 	.set_vq_state = mlx5_vdpa_set_vq_state,
1837 	.get_vq_state = mlx5_vdpa_get_vq_state,
1838 	.get_vq_notification = mlx5_get_vq_notification,
1839 	.get_vq_irq = mlx5_get_vq_irq,
1840 	.get_vq_align = mlx5_vdpa_get_vq_align,
1841 	.get_features = mlx5_vdpa_get_features,
1842 	.set_features = mlx5_vdpa_set_features,
1843 	.set_config_cb = mlx5_vdpa_set_config_cb,
1844 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1845 	.get_device_id = mlx5_vdpa_get_device_id,
1846 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
1847 	.get_status = mlx5_vdpa_get_status,
1848 	.set_status = mlx5_vdpa_set_status,
1849 	.get_config = mlx5_vdpa_get_config,
1850 	.set_config = mlx5_vdpa_set_config,
1851 	.get_generation = mlx5_vdpa_get_generation,
1852 	.set_map = mlx5_vdpa_set_map,
1853 	.free = mlx5_vdpa_free,
1854 };
1855 
1856 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1857 {
1858 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1859 	int err;
1860 
1861 	if (res->valid) {
1862 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1863 		return -EEXIST;
1864 	}
1865 
1866 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1867 	if (err)
1868 		return err;
1869 
1870 	err = create_tis(ndev);
1871 	if (err)
1872 		goto err_tis;
1873 
1874 	res->valid = true;
1875 
1876 	return 0;
1877 
1878 err_tis:
1879 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1880 	return err;
1881 }
1882 
1883 static void free_resources(struct mlx5_vdpa_net *ndev)
1884 {
1885 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1886 
1887 	if (!res->valid)
1888 		return;
1889 
1890 	destroy_tis(ndev);
1891 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1892 	res->valid = false;
1893 }
1894 
1895 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1896 {
1897 	struct mlx5_vdpa_virtqueue *mvq;
1898 	int i;
1899 
1900 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1901 		mvq = &ndev->vqs[i];
1902 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1903 		mvq->index = i;
1904 		mvq->ndev = ndev;
1905 		mvq->fwqp.fw = true;
1906 	}
1907 	for (; i < ndev->mvdev.max_vqs; i++) {
1908 		mvq = &ndev->vqs[i];
1909 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1910 		mvq->index = i;
1911 		mvq->ndev = ndev;
1912 	}
1913 }
1914 
1915 void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
1916 {
1917 	struct virtio_net_config *config;
1918 	struct mlx5_vdpa_dev *mvdev;
1919 	struct mlx5_vdpa_net *ndev;
1920 	u32 max_vqs;
1921 	int err;
1922 
1923 	/* we save one virtqueue for control virtqueue should we require it */
1924 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
1925 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
1926 
1927 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1928 				 2 * mlx5_vdpa_max_qps(max_vqs));
1929 	if (IS_ERR(ndev))
1930 		return ndev;
1931 
1932 	ndev->mvdev.max_vqs = max_vqs;
1933 	mvdev = &ndev->mvdev;
1934 	mvdev->mdev = mdev;
1935 	init_mvqs(ndev);
1936 	mutex_init(&ndev->reslock);
1937 	config = &ndev->config;
1938 	err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu);
1939 	if (err)
1940 		goto err_mtu;
1941 
1942 	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
1943 	if (err)
1944 		goto err_mtu;
1945 
1946 	mvdev->vdev.dma_dev = mdev->device;
1947 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
1948 	if (err)
1949 		goto err_mtu;
1950 
1951 	err = alloc_resources(ndev);
1952 	if (err)
1953 		goto err_res;
1954 
1955 	err = vdpa_register_device(&mvdev->vdev);
1956 	if (err)
1957 		goto err_reg;
1958 
1959 	return ndev;
1960 
1961 err_reg:
1962 	free_resources(ndev);
1963 err_res:
1964 	mlx5_vdpa_free_resources(&ndev->mvdev);
1965 err_mtu:
1966 	mutex_destroy(&ndev->reslock);
1967 	put_device(&mvdev->vdev.dev);
1968 	return ERR_PTR(err);
1969 }
1970 
1971 void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev)
1972 {
1973 	vdpa_unregister_device(&mvdev->vdev);
1974 }
1975