xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision abe9af53)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/vdpa.h>
5 #include <uapi/linux/virtio_ids.h>
6 #include <linux/virtio_config.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/device.h>
9 #include <linux/mlx5/vport.h>
10 #include <linux/mlx5/fs.h>
11 #include <linux/mlx5/device.h>
12 #include "mlx5_vnet.h"
13 #include "mlx5_vdpa_ifc.h"
14 #include "mlx5_vdpa.h"
15 
16 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
17 
18 #define VALID_FEATURES_MASK                                                                        \
19 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
20 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
21 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
22 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
23 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
24 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
25 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
26 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
27 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
28 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
29 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
30 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
31 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
32 
33 #define VALID_STATUS_MASK                                                                          \
34 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
35 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
36 
37 struct mlx5_vdpa_net_resources {
38 	u32 tisn;
39 	u32 tdn;
40 	u32 tirn;
41 	u32 rqtn;
42 	bool valid;
43 };
44 
45 struct mlx5_vdpa_cq_buf {
46 	struct mlx5_frag_buf_ctrl fbc;
47 	struct mlx5_frag_buf frag_buf;
48 	int cqe_size;
49 	int nent;
50 };
51 
52 struct mlx5_vdpa_cq {
53 	struct mlx5_core_cq mcq;
54 	struct mlx5_vdpa_cq_buf buf;
55 	struct mlx5_db db;
56 	int cqe;
57 };
58 
59 struct mlx5_vdpa_umem {
60 	struct mlx5_frag_buf_ctrl fbc;
61 	struct mlx5_frag_buf frag_buf;
62 	int size;
63 	u32 id;
64 };
65 
66 struct mlx5_vdpa_qp {
67 	struct mlx5_core_qp mqp;
68 	struct mlx5_frag_buf frag_buf;
69 	struct mlx5_db db;
70 	u16 head;
71 	bool fw;
72 };
73 
74 struct mlx5_vq_restore_info {
75 	u32 num_ent;
76 	u64 desc_addr;
77 	u64 device_addr;
78 	u64 driver_addr;
79 	u16 avail_index;
80 	bool ready;
81 	struct vdpa_callback cb;
82 	bool restore;
83 };
84 
85 struct mlx5_vdpa_virtqueue {
86 	bool ready;
87 	u64 desc_addr;
88 	u64 device_addr;
89 	u64 driver_addr;
90 	u32 num_ent;
91 	struct vdpa_callback event_cb;
92 
93 	/* Resources for implementing the notification channel from the device
94 	 * to the driver. fwqp is the firmware end of an RC connection; the
95 	 * other end is vqqp used by the driver. cq is is where completions are
96 	 * reported.
97 	 */
98 	struct mlx5_vdpa_cq cq;
99 	struct mlx5_vdpa_qp fwqp;
100 	struct mlx5_vdpa_qp vqqp;
101 
102 	/* umem resources are required for the virtqueue operation. They're use
103 	 * is internal and they must be provided by the driver.
104 	 */
105 	struct mlx5_vdpa_umem umem1;
106 	struct mlx5_vdpa_umem umem2;
107 	struct mlx5_vdpa_umem umem3;
108 
109 	bool initialized;
110 	int index;
111 	u32 virtq_id;
112 	struct mlx5_vdpa_net *ndev;
113 	u16 avail_idx;
114 	int fw_state;
115 
116 	/* keep last in the struct */
117 	struct mlx5_vq_restore_info ri;
118 };
119 
120 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
121  * provides for driver space allocation
122  */
123 #define MLX5_MAX_SUPPORTED_VQS 16
124 
125 struct mlx5_vdpa_net {
126 	struct mlx5_vdpa_dev mvdev;
127 	struct mlx5_vdpa_net_resources res;
128 	struct virtio_net_config config;
129 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
130 
131 	/* Serialize vq resources creation and destruction. This is required
132 	 * since memory map might change and we need to destroy and create
133 	 * resources while driver in operational.
134 	 */
135 	struct mutex reslock;
136 	struct mlx5_flow_table *rxft;
137 	struct mlx5_fc *rx_counter;
138 	struct mlx5_flow_handle *rx_rule;
139 	bool setup;
140 	u16 mtu;
141 };
142 
143 static void free_resources(struct mlx5_vdpa_net *ndev);
144 static void init_mvqs(struct mlx5_vdpa_net *ndev);
145 static int setup_driver(struct mlx5_vdpa_net *ndev);
146 static void teardown_driver(struct mlx5_vdpa_net *ndev);
147 
148 static bool mlx5_vdpa_debug;
149 
150 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
151 	do {                                                                                       \
152 		if (features & BIT_ULL(_feature))                                                  \
153 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
154 	} while (0)
155 
156 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
157 	do {                                                                                       \
158 		if (status & (_status))                                                            \
159 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
160 	} while (0)
161 
162 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
163 {
164 	if (status & ~VALID_STATUS_MASK)
165 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
166 			       status & ~VALID_STATUS_MASK);
167 
168 	if (!mlx5_vdpa_debug)
169 		return;
170 
171 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
172 	if (set && !status) {
173 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
174 		return;
175 	}
176 
177 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
178 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
179 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
180 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
181 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
182 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
183 }
184 
185 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
186 {
187 	if (features & ~VALID_FEATURES_MASK)
188 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
189 			       features & ~VALID_FEATURES_MASK);
190 
191 	if (!mlx5_vdpa_debug)
192 		return;
193 
194 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
195 	if (!features)
196 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
197 
198 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
199 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
200 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
201 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
202 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
203 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
204 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
205 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
206 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
207 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
208 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
209 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
210 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
211 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
212 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
213 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
214 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
215 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
216 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
217 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
218 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
219 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
220 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
221 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
222 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
223 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
224 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
225 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
232 }
233 
234 static int create_tis(struct mlx5_vdpa_net *ndev)
235 {
236 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
237 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
238 	void *tisc;
239 	int err;
240 
241 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
242 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
243 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
244 	if (err)
245 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
246 
247 	return err;
248 }
249 
250 static void destroy_tis(struct mlx5_vdpa_net *ndev)
251 {
252 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
253 }
254 
255 #define MLX5_VDPA_CQE_SIZE 64
256 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
257 
258 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
259 {
260 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
261 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
262 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
263 	int err;
264 
265 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
266 				       ndev->mvdev.mdev->priv.numa_node);
267 	if (err)
268 		return err;
269 
270 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
271 
272 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
273 	buf->nent = nent;
274 
275 	return 0;
276 }
277 
278 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
279 {
280 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
281 
282 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
283 					ndev->mvdev.mdev->priv.numa_node);
284 }
285 
286 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
287 {
288 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
289 }
290 
291 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
292 {
293 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
294 }
295 
296 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
297 {
298 	struct mlx5_cqe64 *cqe64;
299 	void *cqe;
300 	int i;
301 
302 	for (i = 0; i < buf->nent; i++) {
303 		cqe = get_cqe(vcq, i);
304 		cqe64 = cqe;
305 		cqe64->op_own = MLX5_CQE_INVALID << 4;
306 	}
307 }
308 
309 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
310 {
311 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
312 
313 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
314 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
315 		return cqe64;
316 
317 	return NULL;
318 }
319 
320 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
321 {
322 	vqp->head += n;
323 	vqp->db.db[0] = cpu_to_be32(vqp->head);
324 }
325 
326 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
327 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
328 {
329 	struct mlx5_vdpa_qp *vqp;
330 	__be64 *pas;
331 	void *qpc;
332 
333 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
334 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
335 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
336 	if (vqp->fw) {
337 		/* Firmware QP is allocated by the driver for the firmware's
338 		 * use so we can skip part of the params as they will be chosen by firmware
339 		 */
340 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
341 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
342 		MLX5_SET(qpc, qpc, no_sq, 1);
343 		return;
344 	}
345 
346 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
347 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
348 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
349 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
350 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
351 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
352 	MLX5_SET(qpc, qpc, no_sq, 1);
353 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
354 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
355 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
356 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
357 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
358 }
359 
360 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
361 {
362 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
363 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
364 					ndev->mvdev.mdev->priv.numa_node);
365 }
366 
367 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
368 {
369 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
370 }
371 
372 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
373 		     struct mlx5_vdpa_qp *vqp)
374 {
375 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
376 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
377 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
378 	void *qpc;
379 	void *in;
380 	int err;
381 
382 	if (!vqp->fw) {
383 		vqp = &mvq->vqqp;
384 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
385 		if (err)
386 			return err;
387 
388 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
389 		if (err)
390 			goto err_db;
391 		inlen += vqp->frag_buf.npages * sizeof(__be64);
392 	}
393 
394 	in = kzalloc(inlen, GFP_KERNEL);
395 	if (!in) {
396 		err = -ENOMEM;
397 		goto err_kzalloc;
398 	}
399 
400 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
401 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
402 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
403 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
404 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
405 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
406 	if (!vqp->fw)
407 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
408 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
409 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
410 	kfree(in);
411 	if (err)
412 		goto err_kzalloc;
413 
414 	vqp->mqp.uid = ndev->mvdev.res.uid;
415 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
416 
417 	if (!vqp->fw)
418 		rx_post(vqp, mvq->num_ent);
419 
420 	return 0;
421 
422 err_kzalloc:
423 	if (!vqp->fw)
424 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
425 err_db:
426 	if (!vqp->fw)
427 		rq_buf_free(ndev, vqp);
428 
429 	return err;
430 }
431 
432 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
433 {
434 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
435 
436 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
437 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
438 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
439 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
440 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
441 	if (!vqp->fw) {
442 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
443 		rq_buf_free(ndev, vqp);
444 	}
445 }
446 
447 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
448 {
449 	return get_sw_cqe(cq, cq->mcq.cons_index);
450 }
451 
452 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
453 {
454 	struct mlx5_cqe64 *cqe64;
455 
456 	cqe64 = next_cqe_sw(vcq);
457 	if (!cqe64)
458 		return -EAGAIN;
459 
460 	vcq->mcq.cons_index++;
461 	return 0;
462 }
463 
464 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
465 {
466 	mlx5_cq_set_ci(&mvq->cq.mcq);
467 	rx_post(&mvq->vqqp, num);
468 	if (mvq->event_cb.callback)
469 		mvq->event_cb.callback(mvq->event_cb.private);
470 }
471 
472 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
473 {
474 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
475 	struct mlx5_vdpa_net *ndev = mvq->ndev;
476 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
477 	int num = 0;
478 
479 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
480 		num++;
481 		if (num > mvq->num_ent / 2) {
482 			/* If completions keep coming while we poll, we want to
483 			 * let the hardware know that we consumed them by
484 			 * updating the doorbell record.  We also let vdpa core
485 			 * know about this so it passes it on the virtio driver
486 			 * on the guest.
487 			 */
488 			mlx5_vdpa_handle_completions(mvq, num);
489 			num = 0;
490 		}
491 	}
492 
493 	if (num)
494 		mlx5_vdpa_handle_completions(mvq, num);
495 
496 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
497 }
498 
499 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
500 {
501 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
502 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
503 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
504 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
505 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
506 	unsigned int irqn;
507 	__be64 *pas;
508 	int inlen;
509 	void *cqc;
510 	void *in;
511 	int err;
512 	int eqn;
513 
514 	err = mlx5_db_alloc(mdev, &vcq->db);
515 	if (err)
516 		return err;
517 
518 	vcq->mcq.set_ci_db = vcq->db.db;
519 	vcq->mcq.arm_db = vcq->db.db + 1;
520 	vcq->mcq.cqe_sz = 64;
521 
522 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
523 	if (err)
524 		goto err_db;
525 
526 	cq_frag_buf_init(vcq, &vcq->buf);
527 
528 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
529 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
530 	in = kzalloc(inlen, GFP_KERNEL);
531 	if (!in) {
532 		err = -ENOMEM;
533 		goto err_vzalloc;
534 	}
535 
536 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
537 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
538 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
539 
540 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
541 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
542 
543 	/* Use vector 0 by default. Consider adding code to choose least used
544 	 * vector.
545 	 */
546 	err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
547 	if (err)
548 		goto err_vec;
549 
550 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
551 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
552 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
553 	MLX5_SET(cqc, cqc, c_eqn, eqn);
554 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
555 
556 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
557 	if (err)
558 		goto err_vec;
559 
560 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
561 	vcq->cqe = num_ent;
562 	vcq->mcq.set_ci_db = vcq->db.db;
563 	vcq->mcq.arm_db = vcq->db.db + 1;
564 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
565 	kfree(in);
566 	return 0;
567 
568 err_vec:
569 	kfree(in);
570 err_vzalloc:
571 	cq_frag_buf_free(ndev, &vcq->buf);
572 err_db:
573 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
574 	return err;
575 }
576 
577 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
578 {
579 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
580 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
581 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
582 
583 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
584 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
585 		return;
586 	}
587 	cq_frag_buf_free(ndev, &vcq->buf);
588 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
589 }
590 
591 static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
592 		     struct mlx5_vdpa_umem **umemp)
593 {
594 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
595 	int p_a;
596 	int p_b;
597 
598 	switch (num) {
599 	case 1:
600 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
601 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
602 		*umemp = &mvq->umem1;
603 		break;
604 	case 2:
605 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
606 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
607 		*umemp = &mvq->umem2;
608 		break;
609 	case 3:
610 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
611 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
612 		*umemp = &mvq->umem3;
613 		break;
614 	}
615 	return p_a * mvq->num_ent + p_b;
616 }
617 
618 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
619 {
620 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
621 }
622 
623 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
624 {
625 	int inlen;
626 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
627 	void *um;
628 	void *in;
629 	int err;
630 	__be64 *pas;
631 	int size;
632 	struct mlx5_vdpa_umem *umem;
633 
634 	size = umem_size(ndev, mvq, num, &umem);
635 	if (size < 0)
636 		return size;
637 
638 	umem->size = size;
639 	err = umem_frag_buf_alloc(ndev, umem, size);
640 	if (err)
641 		return err;
642 
643 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
644 
645 	in = kzalloc(inlen, GFP_KERNEL);
646 	if (!in) {
647 		err = -ENOMEM;
648 		goto err_in;
649 	}
650 
651 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
652 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
653 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
654 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
655 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
656 
657 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
658 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
659 
660 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
661 	if (err) {
662 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
663 		goto err_cmd;
664 	}
665 
666 	kfree(in);
667 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
668 
669 	return 0;
670 
671 err_cmd:
672 	kfree(in);
673 err_in:
674 	umem_frag_buf_free(ndev, umem);
675 	return err;
676 }
677 
678 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
679 {
680 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
681 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
682 	struct mlx5_vdpa_umem *umem;
683 
684 	switch (num) {
685 	case 1:
686 		umem = &mvq->umem1;
687 		break;
688 	case 2:
689 		umem = &mvq->umem2;
690 		break;
691 	case 3:
692 		umem = &mvq->umem3;
693 		break;
694 	}
695 
696 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
697 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
698 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
699 		return;
700 
701 	umem_frag_buf_free(ndev, umem);
702 }
703 
704 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
705 {
706 	int num;
707 	int err;
708 
709 	for (num = 1; num <= 3; num++) {
710 		err = create_umem(ndev, mvq, num);
711 		if (err)
712 			goto err_umem;
713 	}
714 	return 0;
715 
716 err_umem:
717 	for (num--; num > 0; num--)
718 		umem_destroy(ndev, mvq, num);
719 
720 	return err;
721 }
722 
723 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
724 {
725 	int num;
726 
727 	for (num = 3; num > 0; num--)
728 		umem_destroy(ndev, mvq, num);
729 }
730 
731 static int get_queue_type(struct mlx5_vdpa_net *ndev)
732 {
733 	u32 type_mask;
734 
735 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
736 
737 	/* prefer split queue */
738 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
739 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
740 
741 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
742 
743 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
744 }
745 
746 static bool vq_is_tx(u16 idx)
747 {
748 	return idx % 2;
749 }
750 
751 static u16 get_features_12_3(u64 features)
752 {
753 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
754 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
755 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
756 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
757 }
758 
759 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
760 {
761 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
762 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
763 	void *obj_context;
764 	void *cmd_hdr;
765 	void *vq_ctx;
766 	void *in;
767 	int err;
768 
769 	err = umems_create(ndev, mvq);
770 	if (err)
771 		return err;
772 
773 	in = kzalloc(inlen, GFP_KERNEL);
774 	if (!in) {
775 		err = -ENOMEM;
776 		goto err_alloc;
777 	}
778 
779 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
780 
781 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
782 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
783 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
784 
785 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
786 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
787 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
788 		 get_features_12_3(ndev->mvdev.actual_features));
789 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
790 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
791 
792 	if (vq_is_tx(mvq->index))
793 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
794 
795 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
796 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
797 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
798 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
799 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
800 		 !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1));
801 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
802 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
803 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
804 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
805 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
806 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
807 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
808 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
809 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
810 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
811 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
812 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
813 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
814 
815 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
816 	if (err)
817 		goto err_cmd;
818 
819 	kfree(in);
820 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
821 
822 	return 0;
823 
824 err_cmd:
825 	kfree(in);
826 err_alloc:
827 	umems_destroy(ndev, mvq);
828 	return err;
829 }
830 
831 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
832 {
833 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
834 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
835 
836 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
837 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
838 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
839 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
840 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
841 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
842 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
843 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
844 		return;
845 	}
846 	umems_destroy(ndev, mvq);
847 }
848 
849 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
850 {
851 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
852 }
853 
854 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
855 {
856 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
857 }
858 
859 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
860 			int *outlen, u32 qpn, u32 rqpn)
861 {
862 	void *qpc;
863 	void *pp;
864 
865 	switch (cmd) {
866 	case MLX5_CMD_OP_2RST_QP:
867 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
868 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
869 		*in = kzalloc(*inlen, GFP_KERNEL);
870 		*out = kzalloc(*outlen, GFP_KERNEL);
871 		if (!*in || !*out)
872 			goto outerr;
873 
874 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
875 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
876 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
877 		break;
878 	case MLX5_CMD_OP_RST2INIT_QP:
879 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
880 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
881 		*in = kzalloc(*inlen, GFP_KERNEL);
882 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
883 		if (!*in || !*out)
884 			goto outerr;
885 
886 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
887 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
888 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
889 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
890 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
891 		MLX5_SET(qpc, qpc, rwe, 1);
892 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
893 		MLX5_SET(ads, pp, vhca_port_num, 1);
894 		break;
895 	case MLX5_CMD_OP_INIT2RTR_QP:
896 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
897 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
898 		*in = kzalloc(*inlen, GFP_KERNEL);
899 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
900 		if (!*in || !*out)
901 			goto outerr;
902 
903 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
904 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
905 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
906 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
907 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
908 		MLX5_SET(qpc, qpc, log_msg_max, 30);
909 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
910 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
911 		MLX5_SET(ads, pp, fl, 1);
912 		break;
913 	case MLX5_CMD_OP_RTR2RTS_QP:
914 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
915 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
916 		*in = kzalloc(*inlen, GFP_KERNEL);
917 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
918 		if (!*in || !*out)
919 			goto outerr;
920 
921 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
922 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
923 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
924 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
925 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
926 		MLX5_SET(ads, pp, ack_timeout, 14);
927 		MLX5_SET(qpc, qpc, retry_count, 7);
928 		MLX5_SET(qpc, qpc, rnr_retry, 7);
929 		break;
930 	default:
931 		goto outerr_nullify;
932 	}
933 
934 	return;
935 
936 outerr:
937 	kfree(*in);
938 	kfree(*out);
939 outerr_nullify:
940 	*in = NULL;
941 	*out = NULL;
942 }
943 
944 static void free_inout(void *in, void *out)
945 {
946 	kfree(in);
947 	kfree(out);
948 }
949 
950 /* Two QPs are used by each virtqueue. One is used by the driver and one by
951  * firmware. The fw argument indicates whether the subjected QP is the one used
952  * by firmware.
953  */
954 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
955 {
956 	int outlen;
957 	int inlen;
958 	void *out;
959 	void *in;
960 	int err;
961 
962 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
963 	if (!in || !out)
964 		return -ENOMEM;
965 
966 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
967 	free_inout(in, out);
968 	return err;
969 }
970 
971 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
972 {
973 	int err;
974 
975 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
976 	if (err)
977 		return err;
978 
979 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
980 	if (err)
981 		return err;
982 
983 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
984 	if (err)
985 		return err;
986 
987 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
988 	if (err)
989 		return err;
990 
991 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
992 	if (err)
993 		return err;
994 
995 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
996 	if (err)
997 		return err;
998 
999 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1000 }
1001 
1002 struct mlx5_virtq_attr {
1003 	u8 state;
1004 	u16 available_index;
1005 };
1006 
1007 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1008 			   struct mlx5_virtq_attr *attr)
1009 {
1010 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1011 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1012 	void *out;
1013 	void *obj_context;
1014 	void *cmd_hdr;
1015 	int err;
1016 
1017 	out = kzalloc(outlen, GFP_KERNEL);
1018 	if (!out)
1019 		return -ENOMEM;
1020 
1021 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1022 
1023 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1024 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1025 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1026 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1027 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1028 	if (err)
1029 		goto err_cmd;
1030 
1031 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1032 	memset(attr, 0, sizeof(*attr));
1033 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1034 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1035 	kfree(out);
1036 	return 0;
1037 
1038 err_cmd:
1039 	kfree(out);
1040 	return err;
1041 }
1042 
1043 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1044 {
1045 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1046 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1047 	void *obj_context;
1048 	void *cmd_hdr;
1049 	void *in;
1050 	int err;
1051 
1052 	in = kzalloc(inlen, GFP_KERNEL);
1053 	if (!in)
1054 		return -ENOMEM;
1055 
1056 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1057 
1058 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1059 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1060 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1061 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1062 
1063 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1064 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1065 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1066 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1067 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1068 	kfree(in);
1069 	if (!err)
1070 		mvq->fw_state = state;
1071 
1072 	return err;
1073 }
1074 
1075 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1076 {
1077 	u16 idx = mvq->index;
1078 	int err;
1079 
1080 	if (!mvq->num_ent)
1081 		return 0;
1082 
1083 	if (mvq->initialized) {
1084 		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1085 		return -EINVAL;
1086 	}
1087 
1088 	err = cq_create(ndev, idx, mvq->num_ent);
1089 	if (err)
1090 		return err;
1091 
1092 	err = qp_create(ndev, mvq, &mvq->fwqp);
1093 	if (err)
1094 		goto err_fwqp;
1095 
1096 	err = qp_create(ndev, mvq, &mvq->vqqp);
1097 	if (err)
1098 		goto err_vqqp;
1099 
1100 	err = connect_qps(ndev, mvq);
1101 	if (err)
1102 		goto err_connect;
1103 
1104 	err = create_virtqueue(ndev, mvq);
1105 	if (err)
1106 		goto err_connect;
1107 
1108 	if (mvq->ready) {
1109 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1110 		if (err) {
1111 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1112 				       idx, err);
1113 			goto err_connect;
1114 		}
1115 	}
1116 
1117 	mvq->initialized = true;
1118 	return 0;
1119 
1120 err_connect:
1121 	qp_destroy(ndev, &mvq->vqqp);
1122 err_vqqp:
1123 	qp_destroy(ndev, &mvq->fwqp);
1124 err_fwqp:
1125 	cq_destroy(ndev, idx);
1126 	return err;
1127 }
1128 
1129 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1130 {
1131 	struct mlx5_virtq_attr attr;
1132 
1133 	if (!mvq->initialized)
1134 		return;
1135 
1136 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1137 		return;
1138 
1139 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1140 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1141 
1142 	if (query_virtqueue(ndev, mvq, &attr)) {
1143 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1144 		return;
1145 	}
1146 	mvq->avail_idx = attr.available_index;
1147 }
1148 
1149 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1150 {
1151 	int i;
1152 
1153 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1154 		suspend_vq(ndev, &ndev->vqs[i]);
1155 }
1156 
1157 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1158 {
1159 	if (!mvq->initialized)
1160 		return;
1161 
1162 	suspend_vq(ndev, mvq);
1163 	destroy_virtqueue(ndev, mvq);
1164 	qp_destroy(ndev, &mvq->vqqp);
1165 	qp_destroy(ndev, &mvq->fwqp);
1166 	cq_destroy(ndev, mvq->index);
1167 	mvq->initialized = false;
1168 }
1169 
1170 static int create_rqt(struct mlx5_vdpa_net *ndev)
1171 {
1172 	int log_max_rqt;
1173 	__be32 *list;
1174 	void *rqtc;
1175 	int inlen;
1176 	void *in;
1177 	int i, j;
1178 	int err;
1179 
1180 	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1181 	if (log_max_rqt < 1)
1182 		return -EOPNOTSUPP;
1183 
1184 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1185 	in = kzalloc(inlen, GFP_KERNEL);
1186 	if (!in)
1187 		return -ENOMEM;
1188 
1189 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1190 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1191 
1192 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1193 	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1194 	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1195 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1196 	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1197 		if (!ndev->vqs[j].initialized)
1198 			continue;
1199 
1200 		if (!vq_is_tx(ndev->vqs[j].index)) {
1201 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1202 			i++;
1203 		}
1204 	}
1205 
1206 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1207 	kfree(in);
1208 	if (err)
1209 		return err;
1210 
1211 	return 0;
1212 }
1213 
1214 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1215 {
1216 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1217 }
1218 
1219 static int create_tir(struct mlx5_vdpa_net *ndev)
1220 {
1221 #define HASH_IP_L4PORTS                                                                            \
1222 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1223 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1224 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1225 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1226 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1227 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1228 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1229 	void *rss_key;
1230 	void *outer;
1231 	void *tirc;
1232 	void *in;
1233 	int err;
1234 
1235 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1236 	if (!in)
1237 		return -ENOMEM;
1238 
1239 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1240 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1241 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1242 
1243 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1244 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1245 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1246 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1247 
1248 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1249 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1250 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1251 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1252 
1253 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1254 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1255 
1256 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1257 	kfree(in);
1258 	return err;
1259 }
1260 
1261 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1262 {
1263 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1264 }
1265 
1266 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1267 {
1268 	struct mlx5_flow_destination dest[2] = {};
1269 	struct mlx5_flow_table_attr ft_attr = {};
1270 	struct mlx5_flow_act flow_act = {};
1271 	struct mlx5_flow_namespace *ns;
1272 	int err;
1273 
1274 	/* for now, one entry, match all, forward to tir */
1275 	ft_attr.max_fte = 1;
1276 	ft_attr.autogroup.max_num_groups = 1;
1277 
1278 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1279 	if (!ns) {
1280 		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1281 		return -EOPNOTSUPP;
1282 	}
1283 
1284 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1285 	if (IS_ERR(ndev->rxft))
1286 		return PTR_ERR(ndev->rxft);
1287 
1288 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1289 	if (IS_ERR(ndev->rx_counter)) {
1290 		err = PTR_ERR(ndev->rx_counter);
1291 		goto err_fc;
1292 	}
1293 
1294 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1295 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1296 	dest[0].tir_num = ndev->res.tirn;
1297 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1298 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1299 	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1300 	if (IS_ERR(ndev->rx_rule)) {
1301 		err = PTR_ERR(ndev->rx_rule);
1302 		ndev->rx_rule = NULL;
1303 		goto err_rule;
1304 	}
1305 
1306 	return 0;
1307 
1308 err_rule:
1309 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1310 err_fc:
1311 	mlx5_destroy_flow_table(ndev->rxft);
1312 	return err;
1313 }
1314 
1315 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1316 {
1317 	if (!ndev->rx_rule)
1318 		return;
1319 
1320 	mlx5_del_flow_rules(ndev->rx_rule);
1321 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1322 	mlx5_destroy_flow_table(ndev->rxft);
1323 
1324 	ndev->rx_rule = NULL;
1325 }
1326 
1327 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1328 {
1329 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1330 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1331 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1332 
1333 	if (unlikely(!mvq->ready))
1334 		return;
1335 
1336 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1337 }
1338 
1339 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1340 				    u64 driver_area, u64 device_area)
1341 {
1342 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1343 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1344 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1345 
1346 	mvq->desc_addr = desc_area;
1347 	mvq->device_addr = device_area;
1348 	mvq->driver_addr = driver_area;
1349 	return 0;
1350 }
1351 
1352 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1353 {
1354 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1355 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1356 	struct mlx5_vdpa_virtqueue *mvq;
1357 
1358 	mvq = &ndev->vqs[idx];
1359 	mvq->num_ent = num;
1360 }
1361 
1362 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1363 {
1364 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1365 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1366 	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1367 
1368 	vq->event_cb = *cb;
1369 }
1370 
1371 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1372 {
1373 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1374 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1375 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1376 
1377 	if (!ready)
1378 		suspend_vq(ndev, mvq);
1379 
1380 	mvq->ready = ready;
1381 }
1382 
1383 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1384 {
1385 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1386 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1387 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1388 
1389 	return mvq->ready;
1390 }
1391 
1392 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1393 				  const struct vdpa_vq_state *state)
1394 {
1395 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1396 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1397 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1398 
1399 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1400 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1401 		return -EINVAL;
1402 	}
1403 
1404 	mvq->avail_idx = state->avail_index;
1405 	return 0;
1406 }
1407 
1408 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1409 {
1410 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1411 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1412 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1413 	struct mlx5_virtq_attr attr;
1414 	int err;
1415 
1416 	/* If the virtq object was destroyed, use the value saved at
1417 	 * the last minute of suspend_vq. This caters for userspace
1418 	 * that cares about emulating the index after vq is stopped.
1419 	 */
1420 	if (!mvq->initialized) {
1421 		state->avail_index = mvq->avail_idx;
1422 		return 0;
1423 	}
1424 
1425 	err = query_virtqueue(ndev, mvq, &attr);
1426 	if (err) {
1427 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1428 		return err;
1429 	}
1430 	state->avail_index = attr.available_index;
1431 	return 0;
1432 }
1433 
1434 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1435 {
1436 	return PAGE_SIZE;
1437 }
1438 
1439 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1440 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1441 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1442 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1443 };
1444 
1445 static u64 mlx_to_vritio_features(u16 dev_features)
1446 {
1447 	u64 result = 0;
1448 
1449 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1450 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1451 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1452 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1453 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1454 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1455 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1456 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1457 
1458 	return result;
1459 }
1460 
1461 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1462 {
1463 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1464 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1465 	u16 dev_features;
1466 
1467 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1468 	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1469 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1470 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1471 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1472 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1473 	return ndev->mvdev.mlx_features;
1474 }
1475 
1476 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1477 {
1478 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1479 		return -EOPNOTSUPP;
1480 
1481 	return 0;
1482 }
1483 
1484 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1485 {
1486 	int err;
1487 	int i;
1488 
1489 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1490 		err = setup_vq(ndev, &ndev->vqs[i]);
1491 		if (err)
1492 			goto err_vq;
1493 	}
1494 
1495 	return 0;
1496 
1497 err_vq:
1498 	for (--i; i >= 0; i--)
1499 		teardown_vq(ndev, &ndev->vqs[i]);
1500 
1501 	return err;
1502 }
1503 
1504 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1505 {
1506 	struct mlx5_vdpa_virtqueue *mvq;
1507 	int i;
1508 
1509 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1510 		mvq = &ndev->vqs[i];
1511 		if (!mvq->initialized)
1512 			continue;
1513 
1514 		teardown_vq(ndev, mvq);
1515 	}
1516 }
1517 
1518 /* TODO: cross-endian support */
1519 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1520 {
1521 	return virtio_legacy_is_little_endian() ||
1522 		(mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1));
1523 }
1524 
1525 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1526 {
1527 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1528 }
1529 
1530 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1531 {
1532 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1533 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1534 	int err;
1535 
1536 	print_features(mvdev, features, true);
1537 
1538 	err = verify_min_features(mvdev, features);
1539 	if (err)
1540 		return err;
1541 
1542 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1543 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1544 	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1545 	return err;
1546 }
1547 
1548 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1549 {
1550 	/* not implemented */
1551 	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1552 }
1553 
1554 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1555 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1556 {
1557 	return MLX5_VDPA_MAX_VQ_ENTRIES;
1558 }
1559 
1560 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1561 {
1562 	return VIRTIO_ID_NET;
1563 }
1564 
1565 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1566 {
1567 	return PCI_VENDOR_ID_MELLANOX;
1568 }
1569 
1570 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1571 {
1572 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1573 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1574 
1575 	print_status(mvdev, ndev->mvdev.status, false);
1576 	return ndev->mvdev.status;
1577 }
1578 
1579 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1580 {
1581 	struct mlx5_vq_restore_info *ri = &mvq->ri;
1582 	struct mlx5_virtq_attr attr;
1583 	int err;
1584 
1585 	if (!mvq->initialized)
1586 		return 0;
1587 
1588 	err = query_virtqueue(ndev, mvq, &attr);
1589 	if (err)
1590 		return err;
1591 
1592 	ri->avail_index = attr.available_index;
1593 	ri->ready = mvq->ready;
1594 	ri->num_ent = mvq->num_ent;
1595 	ri->desc_addr = mvq->desc_addr;
1596 	ri->device_addr = mvq->device_addr;
1597 	ri->driver_addr = mvq->driver_addr;
1598 	ri->cb = mvq->event_cb;
1599 	ri->restore = true;
1600 	return 0;
1601 }
1602 
1603 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1604 {
1605 	int i;
1606 
1607 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1608 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1609 		save_channel_info(ndev, &ndev->vqs[i]);
1610 	}
1611 	return 0;
1612 }
1613 
1614 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1615 {
1616 	int i;
1617 
1618 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1619 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1620 }
1621 
1622 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1623 {
1624 	struct mlx5_vdpa_virtqueue *mvq;
1625 	struct mlx5_vq_restore_info *ri;
1626 	int i;
1627 
1628 	mlx5_clear_vqs(ndev);
1629 	init_mvqs(ndev);
1630 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1631 		mvq = &ndev->vqs[i];
1632 		ri = &mvq->ri;
1633 		if (!ri->restore)
1634 			continue;
1635 
1636 		mvq->avail_idx = ri->avail_index;
1637 		mvq->ready = ri->ready;
1638 		mvq->num_ent = ri->num_ent;
1639 		mvq->desc_addr = ri->desc_addr;
1640 		mvq->device_addr = ri->device_addr;
1641 		mvq->driver_addr = ri->driver_addr;
1642 		mvq->event_cb = ri->cb;
1643 	}
1644 }
1645 
1646 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1647 {
1648 	int err;
1649 
1650 	suspend_vqs(ndev);
1651 	err = save_channels_info(ndev);
1652 	if (err)
1653 		goto err_mr;
1654 
1655 	teardown_driver(ndev);
1656 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1657 	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1658 	if (err)
1659 		goto err_mr;
1660 
1661 	if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1662 		return 0;
1663 
1664 	restore_channels_info(ndev);
1665 	err = setup_driver(ndev);
1666 	if (err)
1667 		goto err_setup;
1668 
1669 	return 0;
1670 
1671 err_setup:
1672 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1673 err_mr:
1674 	return err;
1675 }
1676 
1677 static int setup_driver(struct mlx5_vdpa_net *ndev)
1678 {
1679 	int err;
1680 
1681 	mutex_lock(&ndev->reslock);
1682 	if (ndev->setup) {
1683 		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1684 		err = 0;
1685 		goto out;
1686 	}
1687 	err = setup_virtqueues(ndev);
1688 	if (err) {
1689 		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1690 		goto out;
1691 	}
1692 
1693 	err = create_rqt(ndev);
1694 	if (err) {
1695 		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1696 		goto err_rqt;
1697 	}
1698 
1699 	err = create_tir(ndev);
1700 	if (err) {
1701 		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1702 		goto err_tir;
1703 	}
1704 
1705 	err = add_fwd_to_tir(ndev);
1706 	if (err) {
1707 		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1708 		goto err_fwd;
1709 	}
1710 	ndev->setup = true;
1711 	mutex_unlock(&ndev->reslock);
1712 
1713 	return 0;
1714 
1715 err_fwd:
1716 	destroy_tir(ndev);
1717 err_tir:
1718 	destroy_rqt(ndev);
1719 err_rqt:
1720 	teardown_virtqueues(ndev);
1721 out:
1722 	mutex_unlock(&ndev->reslock);
1723 	return err;
1724 }
1725 
1726 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1727 {
1728 	mutex_lock(&ndev->reslock);
1729 	if (!ndev->setup)
1730 		goto out;
1731 
1732 	remove_fwd_to_tir(ndev);
1733 	destroy_tir(ndev);
1734 	destroy_rqt(ndev);
1735 	teardown_virtqueues(ndev);
1736 	ndev->setup = false;
1737 out:
1738 	mutex_unlock(&ndev->reslock);
1739 }
1740 
1741 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1742 {
1743 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1744 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1745 	int err;
1746 
1747 	print_status(mvdev, status, true);
1748 	if (!status) {
1749 		mlx5_vdpa_info(mvdev, "performing device reset\n");
1750 		teardown_driver(ndev);
1751 		mlx5_vdpa_destroy_mr(&ndev->mvdev);
1752 		ndev->mvdev.status = 0;
1753 		ndev->mvdev.mlx_features = 0;
1754 		++mvdev->generation;
1755 		return;
1756 	}
1757 
1758 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1759 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1760 			err = setup_driver(ndev);
1761 			if (err) {
1762 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1763 				goto err_setup;
1764 			}
1765 		} else {
1766 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1767 			return;
1768 		}
1769 	}
1770 
1771 	ndev->mvdev.status = status;
1772 	return;
1773 
1774 err_setup:
1775 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1776 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1777 }
1778 
1779 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1780 				 unsigned int len)
1781 {
1782 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1783 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1784 
1785 	if (offset + len < sizeof(struct virtio_net_config))
1786 		memcpy(buf, (u8 *)&ndev->config + offset, len);
1787 }
1788 
1789 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1790 				 unsigned int len)
1791 {
1792 	/* not supported */
1793 }
1794 
1795 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1796 {
1797 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1798 
1799 	return mvdev->generation;
1800 }
1801 
1802 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1803 {
1804 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1805 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1806 	bool change_map;
1807 	int err;
1808 
1809 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1810 	if (err) {
1811 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1812 		return err;
1813 	}
1814 
1815 	if (change_map)
1816 		return mlx5_vdpa_change_map(ndev, iotlb);
1817 
1818 	return 0;
1819 }
1820 
1821 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1822 {
1823 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1824 	struct mlx5_vdpa_net *ndev;
1825 
1826 	ndev = to_mlx5_vdpa_ndev(mvdev);
1827 
1828 	free_resources(ndev);
1829 	mlx5_vdpa_free_resources(&ndev->mvdev);
1830 	mutex_destroy(&ndev->reslock);
1831 }
1832 
1833 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1834 {
1835 	struct vdpa_notification_area ret = {};
1836 
1837 	return ret;
1838 }
1839 
1840 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1841 {
1842 	return -EOPNOTSUPP;
1843 }
1844 
1845 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1846 	.set_vq_address = mlx5_vdpa_set_vq_address,
1847 	.set_vq_num = mlx5_vdpa_set_vq_num,
1848 	.kick_vq = mlx5_vdpa_kick_vq,
1849 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
1850 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
1851 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
1852 	.set_vq_state = mlx5_vdpa_set_vq_state,
1853 	.get_vq_state = mlx5_vdpa_get_vq_state,
1854 	.get_vq_notification = mlx5_get_vq_notification,
1855 	.get_vq_irq = mlx5_get_vq_irq,
1856 	.get_vq_align = mlx5_vdpa_get_vq_align,
1857 	.get_features = mlx5_vdpa_get_features,
1858 	.set_features = mlx5_vdpa_set_features,
1859 	.set_config_cb = mlx5_vdpa_set_config_cb,
1860 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1861 	.get_device_id = mlx5_vdpa_get_device_id,
1862 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
1863 	.get_status = mlx5_vdpa_get_status,
1864 	.set_status = mlx5_vdpa_set_status,
1865 	.get_config = mlx5_vdpa_get_config,
1866 	.set_config = mlx5_vdpa_set_config,
1867 	.get_generation = mlx5_vdpa_get_generation,
1868 	.set_map = mlx5_vdpa_set_map,
1869 	.free = mlx5_vdpa_free,
1870 };
1871 
1872 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1873 {
1874 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1875 	int err;
1876 
1877 	if (res->valid) {
1878 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1879 		return -EEXIST;
1880 	}
1881 
1882 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1883 	if (err)
1884 		return err;
1885 
1886 	err = create_tis(ndev);
1887 	if (err)
1888 		goto err_tis;
1889 
1890 	res->valid = true;
1891 
1892 	return 0;
1893 
1894 err_tis:
1895 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1896 	return err;
1897 }
1898 
1899 static void free_resources(struct mlx5_vdpa_net *ndev)
1900 {
1901 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1902 
1903 	if (!res->valid)
1904 		return;
1905 
1906 	destroy_tis(ndev);
1907 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1908 	res->valid = false;
1909 }
1910 
1911 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1912 {
1913 	struct mlx5_vdpa_virtqueue *mvq;
1914 	int i;
1915 
1916 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1917 		mvq = &ndev->vqs[i];
1918 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1919 		mvq->index = i;
1920 		mvq->ndev = ndev;
1921 		mvq->fwqp.fw = true;
1922 	}
1923 	for (; i < ndev->mvdev.max_vqs; i++) {
1924 		mvq = &ndev->vqs[i];
1925 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1926 		mvq->index = i;
1927 		mvq->ndev = ndev;
1928 	}
1929 }
1930 
1931 void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
1932 {
1933 	struct virtio_net_config *config;
1934 	struct mlx5_vdpa_dev *mvdev;
1935 	struct mlx5_vdpa_net *ndev;
1936 	u32 max_vqs;
1937 	int err;
1938 
1939 	/* we save one virtqueue for control virtqueue should we require it */
1940 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
1941 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
1942 
1943 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1944 				 2 * mlx5_vdpa_max_qps(max_vqs));
1945 	if (IS_ERR(ndev))
1946 		return ndev;
1947 
1948 	ndev->mvdev.max_vqs = max_vqs;
1949 	mvdev = &ndev->mvdev;
1950 	mvdev->mdev = mdev;
1951 	init_mvqs(ndev);
1952 	mutex_init(&ndev->reslock);
1953 	config = &ndev->config;
1954 	err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu);
1955 	if (err)
1956 		goto err_mtu;
1957 
1958 	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
1959 	if (err)
1960 		goto err_mtu;
1961 
1962 	mvdev->vdev.dma_dev = mdev->device;
1963 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
1964 	if (err)
1965 		goto err_mtu;
1966 
1967 	err = alloc_resources(ndev);
1968 	if (err)
1969 		goto err_res;
1970 
1971 	err = vdpa_register_device(&mvdev->vdev);
1972 	if (err)
1973 		goto err_reg;
1974 
1975 	return ndev;
1976 
1977 err_reg:
1978 	free_resources(ndev);
1979 err_res:
1980 	mlx5_vdpa_free_resources(&ndev->mvdev);
1981 err_mtu:
1982 	mutex_destroy(&ndev->reslock);
1983 	put_device(&mvdev->vdev.dev);
1984 	return ERR_PTR(err);
1985 }
1986 
1987 void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev)
1988 {
1989 	vdpa_unregister_device(&mvdev->vdev);
1990 }
1991