1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <linux/virtio_config.h> 11 #include <linux/auxiliary_bus.h> 12 #include <linux/mlx5/cq.h> 13 #include <linux/mlx5/qp.h> 14 #include <linux/mlx5/device.h> 15 #include <linux/mlx5/driver.h> 16 #include <linux/mlx5/vport.h> 17 #include <linux/mlx5/fs.h> 18 #include <linux/mlx5/mlx5_ifc_vdpa.h> 19 #include <linux/mlx5/mpfs.h> 20 #include "mlx5_vdpa.h" 21 22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 23 MODULE_DESCRIPTION("Mellanox VDPA driver"); 24 MODULE_LICENSE("Dual BSD/GPL"); 25 26 #define to_mlx5_vdpa_ndev(__mvdev) \ 27 container_of(__mvdev, struct mlx5_vdpa_net, mvdev) 28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) 29 30 #define VALID_FEATURES_MASK \ 31 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 32 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 33 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 34 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 35 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 36 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 37 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 38 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 39 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 40 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 41 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 42 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 43 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 44 45 #define VALID_STATUS_MASK \ 46 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 47 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 48 49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 50 51 #define MLX5V_UNTAGGED 0x1000 52 53 struct mlx5_vdpa_net_resources { 54 u32 tisn; 55 u32 tdn; 56 u32 tirn; 57 u32 rqtn; 58 bool valid; 59 }; 60 61 struct mlx5_vdpa_cq_buf { 62 struct mlx5_frag_buf_ctrl fbc; 63 struct mlx5_frag_buf frag_buf; 64 int cqe_size; 65 int nent; 66 }; 67 68 struct mlx5_vdpa_cq { 69 struct mlx5_core_cq mcq; 70 struct mlx5_vdpa_cq_buf buf; 71 struct mlx5_db db; 72 int cqe; 73 }; 74 75 struct mlx5_vdpa_umem { 76 struct mlx5_frag_buf_ctrl fbc; 77 struct mlx5_frag_buf frag_buf; 78 int size; 79 u32 id; 80 }; 81 82 struct mlx5_vdpa_qp { 83 struct mlx5_core_qp mqp; 84 struct mlx5_frag_buf frag_buf; 85 struct mlx5_db db; 86 u16 head; 87 bool fw; 88 }; 89 90 struct mlx5_vq_restore_info { 91 u32 num_ent; 92 u64 desc_addr; 93 u64 device_addr; 94 u64 driver_addr; 95 u16 avail_index; 96 u16 used_index; 97 bool ready; 98 bool restore; 99 }; 100 101 struct mlx5_vdpa_virtqueue { 102 bool ready; 103 u64 desc_addr; 104 u64 device_addr; 105 u64 driver_addr; 106 u32 num_ent; 107 108 /* Resources for implementing the notification channel from the device 109 * to the driver. fwqp is the firmware end of an RC connection; the 110 * other end is vqqp used by the driver. cq is is where completions are 111 * reported. 112 */ 113 struct mlx5_vdpa_cq cq; 114 struct mlx5_vdpa_qp fwqp; 115 struct mlx5_vdpa_qp vqqp; 116 117 /* umem resources are required for the virtqueue operation. They're use 118 * is internal and they must be provided by the driver. 119 */ 120 struct mlx5_vdpa_umem umem1; 121 struct mlx5_vdpa_umem umem2; 122 struct mlx5_vdpa_umem umem3; 123 124 u32 counter_set_id; 125 bool initialized; 126 int index; 127 u32 virtq_id; 128 struct mlx5_vdpa_net *ndev; 129 u16 avail_idx; 130 u16 used_idx; 131 int fw_state; 132 133 /* keep last in the struct */ 134 struct mlx5_vq_restore_info ri; 135 }; 136 137 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 138 { 139 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 140 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 141 return idx < 2; 142 else 143 return idx < 3; 144 } 145 146 return idx <= mvdev->max_idx; 147 } 148 149 #define MLX5V_MACVLAN_SIZE 256 150 151 struct mlx5_vdpa_net { 152 struct mlx5_vdpa_dev mvdev; 153 struct mlx5_vdpa_net_resources res; 154 struct virtio_net_config config; 155 struct mlx5_vdpa_virtqueue *vqs; 156 struct vdpa_callback *event_cbs; 157 158 /* Serialize vq resources creation and destruction. This is required 159 * since memory map might change and we need to destroy and create 160 * resources while driver in operational. 161 */ 162 struct rw_semaphore reslock; 163 struct mlx5_flow_table *rxft; 164 bool setup; 165 u32 cur_num_vqs; 166 u32 rqt_size; 167 struct notifier_block nb; 168 struct vdpa_callback config_cb; 169 struct mlx5_vdpa_wq_ent cvq_ent; 170 struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; 171 }; 172 173 struct macvlan_node { 174 struct hlist_node hlist; 175 struct mlx5_flow_handle *ucast_rule; 176 struct mlx5_flow_handle *mcast_rule; 177 u64 macvlan; 178 }; 179 180 static void free_resources(struct mlx5_vdpa_net *ndev); 181 static void init_mvqs(struct mlx5_vdpa_net *ndev); 182 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 183 static void teardown_driver(struct mlx5_vdpa_net *ndev); 184 185 static bool mlx5_vdpa_debug; 186 187 #define MLX5_CVQ_MAX_ENT 16 188 189 #define MLX5_LOG_VIO_FLAG(_feature) \ 190 do { \ 191 if (features & BIT_ULL(_feature)) \ 192 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 193 } while (0) 194 195 #define MLX5_LOG_VIO_STAT(_status) \ 196 do { \ 197 if (status & (_status)) \ 198 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 199 } while (0) 200 201 /* TODO: cross-endian support */ 202 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 203 { 204 return virtio_legacy_is_little_endian() || 205 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 206 } 207 208 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 209 { 210 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 211 } 212 213 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 214 { 215 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 216 } 217 218 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 219 { 220 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 221 return 2; 222 223 return mvdev->max_vqs; 224 } 225 226 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 227 { 228 return idx == ctrl_vq_idx(mvdev); 229 } 230 231 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 232 { 233 if (status & ~VALID_STATUS_MASK) 234 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 235 status & ~VALID_STATUS_MASK); 236 237 if (!mlx5_vdpa_debug) 238 return; 239 240 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 241 if (set && !status) { 242 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 243 return; 244 } 245 246 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 247 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 248 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 249 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 250 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 251 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 252 } 253 254 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 255 { 256 if (features & ~VALID_FEATURES_MASK) 257 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 258 features & ~VALID_FEATURES_MASK); 259 260 if (!mlx5_vdpa_debug) 261 return; 262 263 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 264 if (!features) 265 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 266 267 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 268 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 269 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 270 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 271 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 272 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 273 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 274 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 275 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 276 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 277 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 278 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 279 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 280 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 281 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 282 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 283 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 284 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 285 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 286 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 287 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 288 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 289 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 290 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 291 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 292 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 293 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 294 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 295 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 296 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 297 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 298 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 299 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 300 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 301 } 302 303 static int create_tis(struct mlx5_vdpa_net *ndev) 304 { 305 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 306 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 307 void *tisc; 308 int err; 309 310 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 311 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 312 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 313 if (err) 314 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 315 316 return err; 317 } 318 319 static void destroy_tis(struct mlx5_vdpa_net *ndev) 320 { 321 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 322 } 323 324 #define MLX5_VDPA_CQE_SIZE 64 325 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 326 327 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 328 { 329 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 330 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 331 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 332 int err; 333 334 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 335 ndev->mvdev.mdev->priv.numa_node); 336 if (err) 337 return err; 338 339 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 340 341 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 342 buf->nent = nent; 343 344 return 0; 345 } 346 347 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 348 { 349 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 350 351 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 352 ndev->mvdev.mdev->priv.numa_node); 353 } 354 355 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 356 { 357 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 358 } 359 360 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 361 { 362 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 363 } 364 365 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 366 { 367 struct mlx5_cqe64 *cqe64; 368 void *cqe; 369 int i; 370 371 for (i = 0; i < buf->nent; i++) { 372 cqe = get_cqe(vcq, i); 373 cqe64 = cqe; 374 cqe64->op_own = MLX5_CQE_INVALID << 4; 375 } 376 } 377 378 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 379 { 380 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 381 382 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 383 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 384 return cqe64; 385 386 return NULL; 387 } 388 389 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 390 { 391 vqp->head += n; 392 vqp->db.db[0] = cpu_to_be32(vqp->head); 393 } 394 395 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 396 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 397 { 398 struct mlx5_vdpa_qp *vqp; 399 __be64 *pas; 400 void *qpc; 401 402 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 403 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 404 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 405 if (vqp->fw) { 406 /* Firmware QP is allocated by the driver for the firmware's 407 * use so we can skip part of the params as they will be chosen by firmware 408 */ 409 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 410 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 411 MLX5_SET(qpc, qpc, no_sq, 1); 412 return; 413 } 414 415 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 416 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 417 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 418 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 419 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 420 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 421 MLX5_SET(qpc, qpc, no_sq, 1); 422 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 423 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 424 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 425 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 426 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 427 } 428 429 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 430 { 431 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 432 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 433 ndev->mvdev.mdev->priv.numa_node); 434 } 435 436 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 437 { 438 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 439 } 440 441 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 442 struct mlx5_vdpa_qp *vqp) 443 { 444 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 445 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 446 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 447 void *qpc; 448 void *in; 449 int err; 450 451 if (!vqp->fw) { 452 vqp = &mvq->vqqp; 453 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 454 if (err) 455 return err; 456 457 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 458 if (err) 459 goto err_db; 460 inlen += vqp->frag_buf.npages * sizeof(__be64); 461 } 462 463 in = kzalloc(inlen, GFP_KERNEL); 464 if (!in) { 465 err = -ENOMEM; 466 goto err_kzalloc; 467 } 468 469 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 470 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 471 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 472 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 473 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 474 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 475 if (!vqp->fw) 476 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 477 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 478 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 479 kfree(in); 480 if (err) 481 goto err_kzalloc; 482 483 vqp->mqp.uid = ndev->mvdev.res.uid; 484 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 485 486 if (!vqp->fw) 487 rx_post(vqp, mvq->num_ent); 488 489 return 0; 490 491 err_kzalloc: 492 if (!vqp->fw) 493 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 494 err_db: 495 if (!vqp->fw) 496 rq_buf_free(ndev, vqp); 497 498 return err; 499 } 500 501 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 502 { 503 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 504 505 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 506 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 507 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 508 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 509 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 510 if (!vqp->fw) { 511 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 512 rq_buf_free(ndev, vqp); 513 } 514 } 515 516 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 517 { 518 return get_sw_cqe(cq, cq->mcq.cons_index); 519 } 520 521 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 522 { 523 struct mlx5_cqe64 *cqe64; 524 525 cqe64 = next_cqe_sw(vcq); 526 if (!cqe64) 527 return -EAGAIN; 528 529 vcq->mcq.cons_index++; 530 return 0; 531 } 532 533 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 534 { 535 struct mlx5_vdpa_net *ndev = mvq->ndev; 536 struct vdpa_callback *event_cb; 537 538 event_cb = &ndev->event_cbs[mvq->index]; 539 mlx5_cq_set_ci(&mvq->cq.mcq); 540 541 /* make sure CQ cosumer update is visible to the hardware before updating 542 * RX doorbell record. 543 */ 544 dma_wmb(); 545 rx_post(&mvq->vqqp, num); 546 if (event_cb->callback) 547 event_cb->callback(event_cb->private); 548 } 549 550 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 551 { 552 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 553 struct mlx5_vdpa_net *ndev = mvq->ndev; 554 void __iomem *uar_page = ndev->mvdev.res.uar->map; 555 int num = 0; 556 557 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 558 num++; 559 if (num > mvq->num_ent / 2) { 560 /* If completions keep coming while we poll, we want to 561 * let the hardware know that we consumed them by 562 * updating the doorbell record. We also let vdpa core 563 * know about this so it passes it on the virtio driver 564 * on the guest. 565 */ 566 mlx5_vdpa_handle_completions(mvq, num); 567 num = 0; 568 } 569 } 570 571 if (num) 572 mlx5_vdpa_handle_completions(mvq, num); 573 574 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 575 } 576 577 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 578 { 579 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 580 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 581 void __iomem *uar_page = ndev->mvdev.res.uar->map; 582 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 583 struct mlx5_vdpa_cq *vcq = &mvq->cq; 584 __be64 *pas; 585 int inlen; 586 void *cqc; 587 void *in; 588 int err; 589 int eqn; 590 591 err = mlx5_db_alloc(mdev, &vcq->db); 592 if (err) 593 return err; 594 595 vcq->mcq.set_ci_db = vcq->db.db; 596 vcq->mcq.arm_db = vcq->db.db + 1; 597 vcq->mcq.cqe_sz = 64; 598 599 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 600 if (err) 601 goto err_db; 602 603 cq_frag_buf_init(vcq, &vcq->buf); 604 605 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 606 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 607 in = kzalloc(inlen, GFP_KERNEL); 608 if (!in) { 609 err = -ENOMEM; 610 goto err_vzalloc; 611 } 612 613 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 614 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 615 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 616 617 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 618 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 619 620 /* Use vector 0 by default. Consider adding code to choose least used 621 * vector. 622 */ 623 err = mlx5_vector2eqn(mdev, 0, &eqn); 624 if (err) 625 goto err_vec; 626 627 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 628 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 629 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 630 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 631 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 632 633 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 634 if (err) 635 goto err_vec; 636 637 vcq->mcq.comp = mlx5_vdpa_cq_comp; 638 vcq->cqe = num_ent; 639 vcq->mcq.set_ci_db = vcq->db.db; 640 vcq->mcq.arm_db = vcq->db.db + 1; 641 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 642 kfree(in); 643 return 0; 644 645 err_vec: 646 kfree(in); 647 err_vzalloc: 648 cq_frag_buf_free(ndev, &vcq->buf); 649 err_db: 650 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 651 return err; 652 } 653 654 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 655 { 656 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 657 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 658 struct mlx5_vdpa_cq *vcq = &mvq->cq; 659 660 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 661 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 662 return; 663 } 664 cq_frag_buf_free(ndev, &vcq->buf); 665 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 666 } 667 668 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 669 struct mlx5_vdpa_umem **umemp) 670 { 671 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 672 int p_a; 673 int p_b; 674 675 switch (num) { 676 case 1: 677 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); 678 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); 679 *umemp = &mvq->umem1; 680 break; 681 case 2: 682 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); 683 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); 684 *umemp = &mvq->umem2; 685 break; 686 case 3: 687 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); 688 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); 689 *umemp = &mvq->umem3; 690 break; 691 } 692 (*umemp)->size = p_a * mvq->num_ent + p_b; 693 } 694 695 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 696 { 697 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 698 } 699 700 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 701 { 702 int inlen; 703 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 704 void *um; 705 void *in; 706 int err; 707 __be64 *pas; 708 struct mlx5_vdpa_umem *umem; 709 710 set_umem_size(ndev, mvq, num, &umem); 711 err = umem_frag_buf_alloc(ndev, umem, umem->size); 712 if (err) 713 return err; 714 715 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 716 717 in = kzalloc(inlen, GFP_KERNEL); 718 if (!in) { 719 err = -ENOMEM; 720 goto err_in; 721 } 722 723 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 724 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 725 um = MLX5_ADDR_OF(create_umem_in, in, umem); 726 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 727 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 728 729 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 730 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 731 732 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 733 if (err) { 734 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 735 goto err_cmd; 736 } 737 738 kfree(in); 739 umem->id = MLX5_GET(create_umem_out, out, umem_id); 740 741 return 0; 742 743 err_cmd: 744 kfree(in); 745 err_in: 746 umem_frag_buf_free(ndev, umem); 747 return err; 748 } 749 750 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 751 { 752 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 753 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 754 struct mlx5_vdpa_umem *umem; 755 756 switch (num) { 757 case 1: 758 umem = &mvq->umem1; 759 break; 760 case 2: 761 umem = &mvq->umem2; 762 break; 763 case 3: 764 umem = &mvq->umem3; 765 break; 766 } 767 768 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 769 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 770 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 771 return; 772 773 umem_frag_buf_free(ndev, umem); 774 } 775 776 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 777 { 778 int num; 779 int err; 780 781 for (num = 1; num <= 3; num++) { 782 err = create_umem(ndev, mvq, num); 783 if (err) 784 goto err_umem; 785 } 786 return 0; 787 788 err_umem: 789 for (num--; num > 0; num--) 790 umem_destroy(ndev, mvq, num); 791 792 return err; 793 } 794 795 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 796 { 797 int num; 798 799 for (num = 3; num > 0; num--) 800 umem_destroy(ndev, mvq, num); 801 } 802 803 static int get_queue_type(struct mlx5_vdpa_net *ndev) 804 { 805 u32 type_mask; 806 807 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 808 809 /* prefer split queue */ 810 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 811 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 812 813 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 814 815 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 816 } 817 818 static bool vq_is_tx(u16 idx) 819 { 820 return idx % 2; 821 } 822 823 static u16 get_features_12_3(u64 features) 824 { 825 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) | 826 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) | 827 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) | 828 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); 829 } 830 831 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 832 { 833 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 834 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 835 } 836 837 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 838 { 839 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 840 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 841 void *obj_context; 842 void *cmd_hdr; 843 void *vq_ctx; 844 void *in; 845 int err; 846 847 err = umems_create(ndev, mvq); 848 if (err) 849 return err; 850 851 in = kzalloc(inlen, GFP_KERNEL); 852 if (!in) { 853 err = -ENOMEM; 854 goto err_alloc; 855 } 856 857 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 858 859 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 860 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 861 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 862 863 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 864 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 865 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 866 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 867 get_features_12_3(ndev->mvdev.actual_features)); 868 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 869 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 870 871 if (vq_is_tx(mvq->index)) 872 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 873 874 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 875 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 876 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 877 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 878 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 879 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 880 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 881 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 882 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 883 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); 884 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 885 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 886 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 887 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 888 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 889 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 890 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 891 if (counters_supported(&ndev->mvdev)) 892 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 893 894 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 895 if (err) 896 goto err_cmd; 897 898 kfree(in); 899 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 900 901 return 0; 902 903 err_cmd: 904 kfree(in); 905 err_alloc: 906 umems_destroy(ndev, mvq); 907 return err; 908 } 909 910 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 911 { 912 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 913 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 914 915 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 916 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 917 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 918 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 919 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 920 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 921 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 922 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 923 return; 924 } 925 umems_destroy(ndev, mvq); 926 } 927 928 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 929 { 930 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 931 } 932 933 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 934 { 935 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 936 } 937 938 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 939 int *outlen, u32 qpn, u32 rqpn) 940 { 941 void *qpc; 942 void *pp; 943 944 switch (cmd) { 945 case MLX5_CMD_OP_2RST_QP: 946 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 947 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 948 *in = kzalloc(*inlen, GFP_KERNEL); 949 *out = kzalloc(*outlen, GFP_KERNEL); 950 if (!*in || !*out) 951 goto outerr; 952 953 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 954 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 955 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 956 break; 957 case MLX5_CMD_OP_RST2INIT_QP: 958 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 959 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 960 *in = kzalloc(*inlen, GFP_KERNEL); 961 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 962 if (!*in || !*out) 963 goto outerr; 964 965 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 966 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 967 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 968 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 969 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 970 MLX5_SET(qpc, qpc, rwe, 1); 971 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 972 MLX5_SET(ads, pp, vhca_port_num, 1); 973 break; 974 case MLX5_CMD_OP_INIT2RTR_QP: 975 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 976 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 977 *in = kzalloc(*inlen, GFP_KERNEL); 978 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 979 if (!*in || !*out) 980 goto outerr; 981 982 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 983 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 984 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 985 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 986 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 987 MLX5_SET(qpc, qpc, log_msg_max, 30); 988 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 989 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 990 MLX5_SET(ads, pp, fl, 1); 991 break; 992 case MLX5_CMD_OP_RTR2RTS_QP: 993 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 994 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 995 *in = kzalloc(*inlen, GFP_KERNEL); 996 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 997 if (!*in || !*out) 998 goto outerr; 999 1000 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1001 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1002 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1003 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1004 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1005 MLX5_SET(ads, pp, ack_timeout, 14); 1006 MLX5_SET(qpc, qpc, retry_count, 7); 1007 MLX5_SET(qpc, qpc, rnr_retry, 7); 1008 break; 1009 default: 1010 goto outerr_nullify; 1011 } 1012 1013 return; 1014 1015 outerr: 1016 kfree(*in); 1017 kfree(*out); 1018 outerr_nullify: 1019 *in = NULL; 1020 *out = NULL; 1021 } 1022 1023 static void free_inout(void *in, void *out) 1024 { 1025 kfree(in); 1026 kfree(out); 1027 } 1028 1029 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1030 * firmware. The fw argument indicates whether the subjected QP is the one used 1031 * by firmware. 1032 */ 1033 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1034 { 1035 int outlen; 1036 int inlen; 1037 void *out; 1038 void *in; 1039 int err; 1040 1041 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1042 if (!in || !out) 1043 return -ENOMEM; 1044 1045 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1046 free_inout(in, out); 1047 return err; 1048 } 1049 1050 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1051 { 1052 int err; 1053 1054 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1055 if (err) 1056 return err; 1057 1058 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1059 if (err) 1060 return err; 1061 1062 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1063 if (err) 1064 return err; 1065 1066 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1067 if (err) 1068 return err; 1069 1070 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1071 if (err) 1072 return err; 1073 1074 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1075 if (err) 1076 return err; 1077 1078 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1079 } 1080 1081 struct mlx5_virtq_attr { 1082 u8 state; 1083 u16 available_index; 1084 u16 used_index; 1085 }; 1086 1087 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1088 struct mlx5_virtq_attr *attr) 1089 { 1090 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1091 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1092 void *out; 1093 void *obj_context; 1094 void *cmd_hdr; 1095 int err; 1096 1097 out = kzalloc(outlen, GFP_KERNEL); 1098 if (!out) 1099 return -ENOMEM; 1100 1101 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1102 1103 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1104 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1105 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1106 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1107 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1108 if (err) 1109 goto err_cmd; 1110 1111 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1112 memset(attr, 0, sizeof(*attr)); 1113 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1114 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1115 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1116 kfree(out); 1117 return 0; 1118 1119 err_cmd: 1120 kfree(out); 1121 return err; 1122 } 1123 1124 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1125 { 1126 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1127 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1128 void *obj_context; 1129 void *cmd_hdr; 1130 void *in; 1131 int err; 1132 1133 in = kzalloc(inlen, GFP_KERNEL); 1134 if (!in) 1135 return -ENOMEM; 1136 1137 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1138 1139 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1140 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1141 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1142 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1143 1144 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1145 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1146 MLX5_VIRTQ_MODIFY_MASK_STATE); 1147 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1148 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1149 kfree(in); 1150 if (!err) 1151 mvq->fw_state = state; 1152 1153 return err; 1154 } 1155 1156 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1157 { 1158 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1159 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1160 void *cmd_hdr; 1161 int err; 1162 1163 if (!counters_supported(&ndev->mvdev)) 1164 return 0; 1165 1166 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1167 1168 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1169 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1170 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1171 1172 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1173 if (err) 1174 return err; 1175 1176 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1177 1178 return 0; 1179 } 1180 1181 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1182 { 1183 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1184 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1185 1186 if (!counters_supported(&ndev->mvdev)) 1187 return; 1188 1189 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1190 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1191 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1192 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1193 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1194 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1195 } 1196 1197 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1198 { 1199 u16 idx = mvq->index; 1200 int err; 1201 1202 if (!mvq->num_ent) 1203 return 0; 1204 1205 if (mvq->initialized) 1206 return 0; 1207 1208 err = cq_create(ndev, idx, mvq->num_ent); 1209 if (err) 1210 return err; 1211 1212 err = qp_create(ndev, mvq, &mvq->fwqp); 1213 if (err) 1214 goto err_fwqp; 1215 1216 err = qp_create(ndev, mvq, &mvq->vqqp); 1217 if (err) 1218 goto err_vqqp; 1219 1220 err = connect_qps(ndev, mvq); 1221 if (err) 1222 goto err_connect; 1223 1224 err = counter_set_alloc(ndev, mvq); 1225 if (err) 1226 goto err_counter; 1227 1228 err = create_virtqueue(ndev, mvq); 1229 if (err) 1230 goto err_connect; 1231 1232 if (mvq->ready) { 1233 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1234 if (err) { 1235 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1236 idx, err); 1237 goto err_connect; 1238 } 1239 } 1240 1241 mvq->initialized = true; 1242 return 0; 1243 1244 err_connect: 1245 counter_set_dealloc(ndev, mvq); 1246 err_counter: 1247 qp_destroy(ndev, &mvq->vqqp); 1248 err_vqqp: 1249 qp_destroy(ndev, &mvq->fwqp); 1250 err_fwqp: 1251 cq_destroy(ndev, idx); 1252 return err; 1253 } 1254 1255 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1256 { 1257 struct mlx5_virtq_attr attr; 1258 1259 if (!mvq->initialized) 1260 return; 1261 1262 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1263 return; 1264 1265 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1266 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1267 1268 if (query_virtqueue(ndev, mvq, &attr)) { 1269 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1270 return; 1271 } 1272 mvq->avail_idx = attr.available_index; 1273 mvq->used_idx = attr.used_index; 1274 } 1275 1276 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1277 { 1278 int i; 1279 1280 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1281 suspend_vq(ndev, &ndev->vqs[i]); 1282 } 1283 1284 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1285 { 1286 if (!mvq->initialized) 1287 return; 1288 1289 suspend_vq(ndev, mvq); 1290 destroy_virtqueue(ndev, mvq); 1291 counter_set_dealloc(ndev, mvq); 1292 qp_destroy(ndev, &mvq->vqqp); 1293 qp_destroy(ndev, &mvq->fwqp); 1294 cq_destroy(ndev, mvq->index); 1295 mvq->initialized = false; 1296 } 1297 1298 static int create_rqt(struct mlx5_vdpa_net *ndev) 1299 { 1300 __be32 *list; 1301 void *rqtc; 1302 int inlen; 1303 void *in; 1304 int i, j; 1305 int err; 1306 1307 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); 1308 in = kzalloc(inlen, GFP_KERNEL); 1309 if (!in) 1310 return -ENOMEM; 1311 1312 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1313 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1314 1315 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1316 MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size); 1317 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1318 for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) 1319 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1320 1321 MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); 1322 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1323 kfree(in); 1324 if (err) 1325 return err; 1326 1327 return 0; 1328 } 1329 1330 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1331 1332 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1333 { 1334 __be32 *list; 1335 void *rqtc; 1336 int inlen; 1337 void *in; 1338 int i, j; 1339 int err; 1340 1341 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); 1342 in = kzalloc(inlen, GFP_KERNEL); 1343 if (!in) 1344 return -ENOMEM; 1345 1346 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1347 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1348 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1349 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1350 1351 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1352 for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) 1353 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1354 1355 MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); 1356 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1357 kfree(in); 1358 if (err) 1359 return err; 1360 1361 return 0; 1362 } 1363 1364 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1365 { 1366 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1367 } 1368 1369 static int create_tir(struct mlx5_vdpa_net *ndev) 1370 { 1371 #define HASH_IP_L4PORTS \ 1372 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1373 MLX5_HASH_FIELD_SEL_L4_DPORT) 1374 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1375 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1376 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1377 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1378 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1379 void *rss_key; 1380 void *outer; 1381 void *tirc; 1382 void *in; 1383 int err; 1384 1385 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1386 if (!in) 1387 return -ENOMEM; 1388 1389 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1390 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1391 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1392 1393 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1394 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1395 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1396 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1397 1398 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1399 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1400 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1401 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1402 1403 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1404 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1405 1406 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1407 kfree(in); 1408 return err; 1409 } 1410 1411 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1412 { 1413 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1414 } 1415 1416 #define MAX_STEERING_ENT 0x8000 1417 #define MAX_STEERING_GROUPS 2 1418 1419 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1420 u16 vid, bool tagged, 1421 struct mlx5_flow_handle **ucast, 1422 struct mlx5_flow_handle **mcast) 1423 { 1424 struct mlx5_flow_destination dest = {}; 1425 struct mlx5_flow_act flow_act = {}; 1426 struct mlx5_flow_handle *rule; 1427 struct mlx5_flow_spec *spec; 1428 void *headers_c; 1429 void *headers_v; 1430 u8 *dmac_c; 1431 u8 *dmac_v; 1432 int err; 1433 1434 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1435 if (!spec) 1436 return -ENOMEM; 1437 1438 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1439 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1440 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1441 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1442 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1443 memset(dmac_c, 0xff, ETH_ALEN); 1444 ether_addr_copy(dmac_v, mac); 1445 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1446 if (tagged) { 1447 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1448 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1449 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); 1450 } 1451 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1452 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1453 dest.tir_num = ndev->res.tirn; 1454 rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); 1455 if (IS_ERR(rule)) 1456 return PTR_ERR(rule); 1457 1458 *ucast = rule; 1459 1460 memset(dmac_c, 0, ETH_ALEN); 1461 memset(dmac_v, 0, ETH_ALEN); 1462 dmac_c[0] = 1; 1463 dmac_v[0] = 1; 1464 rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); 1465 kvfree(spec); 1466 if (IS_ERR(rule)) { 1467 err = PTR_ERR(rule); 1468 goto err_mcast; 1469 } 1470 1471 *mcast = rule; 1472 return 0; 1473 1474 err_mcast: 1475 mlx5_del_flow_rules(*ucast); 1476 return err; 1477 } 1478 1479 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1480 struct mlx5_flow_handle *ucast, 1481 struct mlx5_flow_handle *mcast) 1482 { 1483 mlx5_del_flow_rules(ucast); 1484 mlx5_del_flow_rules(mcast); 1485 } 1486 1487 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1488 { 1489 u64 val; 1490 1491 if (!tagged) 1492 vlan = MLX5V_UNTAGGED; 1493 1494 val = (u64)vlan << 48 | 1495 (u64)mac[0] << 40 | 1496 (u64)mac[1] << 32 | 1497 (u64)mac[2] << 24 | 1498 (u64)mac[3] << 16 | 1499 (u64)mac[4] << 8 | 1500 (u64)mac[5]; 1501 1502 return val; 1503 } 1504 1505 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1506 { 1507 struct macvlan_node *pos; 1508 u32 idx; 1509 1510 idx = hash_64(value, 8); // tbd 8 1511 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1512 if (pos->macvlan == value) 1513 return pos; 1514 } 1515 return NULL; 1516 } 1517 1518 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid 1519 { 1520 struct macvlan_node *ptr; 1521 u64 val; 1522 u32 idx; 1523 int err; 1524 1525 val = search_val(mac, vlan, tagged); 1526 if (mac_vlan_lookup(ndev, val)) 1527 return -EEXIST; 1528 1529 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1530 if (!ptr) 1531 return -ENOMEM; 1532 1533 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged, 1534 &ptr->ucast_rule, &ptr->mcast_rule); 1535 if (err) 1536 goto err_add; 1537 1538 ptr->macvlan = val; 1539 idx = hash_64(val, 8); 1540 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1541 return 0; 1542 1543 err_add: 1544 kfree(ptr); 1545 return err; 1546 } 1547 1548 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1549 { 1550 struct macvlan_node *ptr; 1551 1552 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1553 if (!ptr) 1554 return; 1555 1556 hlist_del(&ptr->hlist); 1557 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule); 1558 kfree(ptr); 1559 } 1560 1561 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1562 { 1563 struct macvlan_node *pos; 1564 struct hlist_node *n; 1565 int i; 1566 1567 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1568 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1569 hlist_del(&pos->hlist); 1570 mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule); 1571 kfree(pos); 1572 } 1573 } 1574 } 1575 1576 static int setup_steering(struct mlx5_vdpa_net *ndev) 1577 { 1578 struct mlx5_flow_table_attr ft_attr = {}; 1579 struct mlx5_flow_namespace *ns; 1580 int err; 1581 1582 ft_attr.max_fte = MAX_STEERING_ENT; 1583 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1584 1585 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1586 if (!ns) { 1587 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1588 return -EOPNOTSUPP; 1589 } 1590 1591 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1592 if (IS_ERR(ndev->rxft)) { 1593 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1594 return PTR_ERR(ndev->rxft); 1595 } 1596 1597 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1598 if (err) 1599 goto err_add; 1600 1601 return 0; 1602 1603 err_add: 1604 mlx5_destroy_flow_table(ndev->rxft); 1605 return err; 1606 } 1607 1608 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1609 { 1610 clear_mac_vlan_table(ndev); 1611 mlx5_destroy_flow_table(ndev->rxft); 1612 } 1613 1614 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1615 { 1616 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1617 struct mlx5_control_vq *cvq = &mvdev->cvq; 1618 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1619 struct mlx5_core_dev *pfmdev; 1620 size_t read; 1621 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1622 1623 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1624 switch (cmd) { 1625 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1626 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1627 if (read != ETH_ALEN) 1628 break; 1629 1630 if (!memcmp(ndev->config.mac, mac, 6)) { 1631 status = VIRTIO_NET_OK; 1632 break; 1633 } 1634 1635 if (is_zero_ether_addr(mac)) 1636 break; 1637 1638 if (!is_zero_ether_addr(ndev->config.mac)) { 1639 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1640 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1641 ndev->config.mac); 1642 break; 1643 } 1644 } 1645 1646 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1647 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1648 mac); 1649 break; 1650 } 1651 1652 /* backup the original mac address so that if failed to add the forward rules 1653 * we could restore it 1654 */ 1655 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1656 1657 memcpy(ndev->config.mac, mac, ETH_ALEN); 1658 1659 /* Need recreate the flow table entry, so that the packet could forward back 1660 */ 1661 mac_vlan_del(ndev, ndev->config.mac, 0, false); 1662 1663 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1664 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1665 1666 /* Although it hardly run here, we still need double check */ 1667 if (is_zero_ether_addr(mac_back)) { 1668 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1669 break; 1670 } 1671 1672 /* Try to restore original mac address to MFPS table, and try to restore 1673 * the forward rule entry. 1674 */ 1675 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1676 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1677 ndev->config.mac); 1678 } 1679 1680 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1681 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1682 mac_back); 1683 } 1684 1685 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1686 1687 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1688 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1689 1690 break; 1691 } 1692 1693 status = VIRTIO_NET_OK; 1694 break; 1695 1696 default: 1697 break; 1698 } 1699 1700 return status; 1701 } 1702 1703 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1704 { 1705 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1706 int cur_qps = ndev->cur_num_vqs / 2; 1707 int err; 1708 int i; 1709 1710 if (cur_qps > newqps) { 1711 err = modify_rqt(ndev, 2 * newqps); 1712 if (err) 1713 return err; 1714 1715 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1716 teardown_vq(ndev, &ndev->vqs[i]); 1717 1718 ndev->cur_num_vqs = 2 * newqps; 1719 } else { 1720 ndev->cur_num_vqs = 2 * newqps; 1721 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1722 err = setup_vq(ndev, &ndev->vqs[i]); 1723 if (err) 1724 goto clean_added; 1725 } 1726 err = modify_rqt(ndev, 2 * newqps); 1727 if (err) 1728 goto clean_added; 1729 } 1730 return 0; 1731 1732 clean_added: 1733 for (--i; i >= 2 * cur_qps; --i) 1734 teardown_vq(ndev, &ndev->vqs[i]); 1735 1736 ndev->cur_num_vqs = 2 * cur_qps; 1737 1738 return err; 1739 } 1740 1741 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1742 { 1743 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1744 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1745 struct mlx5_control_vq *cvq = &mvdev->cvq; 1746 struct virtio_net_ctrl_mq mq; 1747 size_t read; 1748 u16 newqps; 1749 1750 switch (cmd) { 1751 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1752 /* This mq feature check aligns with pre-existing userspace 1753 * implementation. 1754 * 1755 * Without it, an untrusted driver could fake a multiqueue config 1756 * request down to a non-mq device that may cause kernel to 1757 * panic due to uninitialized resources for extra vqs. Even with 1758 * a well behaving guest driver, it is not expected to allow 1759 * changing the number of vqs on a non-mq device. 1760 */ 1761 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1762 break; 1763 1764 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1765 if (read != sizeof(mq)) 1766 break; 1767 1768 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1769 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1770 newqps > ndev->rqt_size) 1771 break; 1772 1773 if (ndev->cur_num_vqs == 2 * newqps) { 1774 status = VIRTIO_NET_OK; 1775 break; 1776 } 1777 1778 if (!change_num_qps(mvdev, newqps)) 1779 status = VIRTIO_NET_OK; 1780 1781 break; 1782 default: 1783 break; 1784 } 1785 1786 return status; 1787 } 1788 1789 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1790 { 1791 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1792 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1793 struct mlx5_control_vq *cvq = &mvdev->cvq; 1794 __virtio16 vlan; 1795 size_t read; 1796 u16 id; 1797 1798 switch (cmd) { 1799 case VIRTIO_NET_CTRL_VLAN_ADD: 1800 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1801 if (read != sizeof(vlan)) 1802 break; 1803 1804 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1805 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1806 break; 1807 1808 status = VIRTIO_NET_OK; 1809 break; 1810 case VIRTIO_NET_CTRL_VLAN_DEL: 1811 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1812 if (read != sizeof(vlan)) 1813 break; 1814 1815 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1816 mac_vlan_del(ndev, ndev->config.mac, id, true); 1817 break; 1818 default: 1819 break; 1820 } 1821 1822 return status; 1823 } 1824 1825 static void mlx5_cvq_kick_handler(struct work_struct *work) 1826 { 1827 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1828 struct virtio_net_ctrl_hdr ctrl; 1829 struct mlx5_vdpa_wq_ent *wqent; 1830 struct mlx5_vdpa_dev *mvdev; 1831 struct mlx5_control_vq *cvq; 1832 struct mlx5_vdpa_net *ndev; 1833 size_t read, write; 1834 int err; 1835 1836 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 1837 mvdev = wqent->mvdev; 1838 ndev = to_mlx5_vdpa_ndev(mvdev); 1839 cvq = &mvdev->cvq; 1840 1841 down_write(&ndev->reslock); 1842 1843 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1844 goto out; 1845 1846 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1847 goto out; 1848 1849 if (!cvq->ready) 1850 goto out; 1851 1852 while (true) { 1853 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 1854 GFP_ATOMIC); 1855 if (err <= 0) 1856 break; 1857 1858 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 1859 if (read != sizeof(ctrl)) 1860 break; 1861 1862 cvq->received_desc++; 1863 switch (ctrl.class) { 1864 case VIRTIO_NET_CTRL_MAC: 1865 status = handle_ctrl_mac(mvdev, ctrl.cmd); 1866 break; 1867 case VIRTIO_NET_CTRL_MQ: 1868 status = handle_ctrl_mq(mvdev, ctrl.cmd); 1869 break; 1870 case VIRTIO_NET_CTRL_VLAN: 1871 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 1872 break; 1873 default: 1874 break; 1875 } 1876 1877 /* Make sure data is written before advancing index */ 1878 smp_wmb(); 1879 1880 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 1881 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 1882 vringh_kiov_cleanup(&cvq->riov); 1883 vringh_kiov_cleanup(&cvq->wiov); 1884 1885 if (vringh_need_notify_iotlb(&cvq->vring)) 1886 vringh_notify(&cvq->vring); 1887 1888 cvq->completed_desc++; 1889 queue_work(mvdev->wq, &wqent->work); 1890 break; 1891 } 1892 1893 out: 1894 up_write(&ndev->reslock); 1895 } 1896 1897 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 1898 { 1899 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1900 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1901 struct mlx5_vdpa_virtqueue *mvq; 1902 1903 if (!is_index_valid(mvdev, idx)) 1904 return; 1905 1906 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 1907 if (!mvdev->wq || !mvdev->cvq.ready) 1908 return; 1909 1910 queue_work(mvdev->wq, &ndev->cvq_ent.work); 1911 return; 1912 } 1913 1914 mvq = &ndev->vqs[idx]; 1915 if (unlikely(!mvq->ready)) 1916 return; 1917 1918 iowrite16(idx, ndev->mvdev.res.kick_addr); 1919 } 1920 1921 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 1922 u64 driver_area, u64 device_area) 1923 { 1924 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1925 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1926 struct mlx5_vdpa_virtqueue *mvq; 1927 1928 if (!is_index_valid(mvdev, idx)) 1929 return -EINVAL; 1930 1931 if (is_ctrl_vq_idx(mvdev, idx)) { 1932 mvdev->cvq.desc_addr = desc_area; 1933 mvdev->cvq.device_addr = device_area; 1934 mvdev->cvq.driver_addr = driver_area; 1935 return 0; 1936 } 1937 1938 mvq = &ndev->vqs[idx]; 1939 mvq->desc_addr = desc_area; 1940 mvq->device_addr = device_area; 1941 mvq->driver_addr = driver_area; 1942 return 0; 1943 } 1944 1945 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 1946 { 1947 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1948 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1949 struct mlx5_vdpa_virtqueue *mvq; 1950 1951 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 1952 return; 1953 1954 mvq = &ndev->vqs[idx]; 1955 mvq->num_ent = num; 1956 } 1957 1958 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 1959 { 1960 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1961 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1962 1963 ndev->event_cbs[idx] = *cb; 1964 } 1965 1966 static void mlx5_cvq_notify(struct vringh *vring) 1967 { 1968 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 1969 1970 if (!cvq->event_cb.callback) 1971 return; 1972 1973 cvq->event_cb.callback(cvq->event_cb.private); 1974 } 1975 1976 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 1977 { 1978 struct mlx5_control_vq *cvq = &mvdev->cvq; 1979 1980 cvq->ready = ready; 1981 if (!ready) 1982 return; 1983 1984 cvq->vring.notify = mlx5_cvq_notify; 1985 } 1986 1987 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 1988 { 1989 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1990 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1991 struct mlx5_vdpa_virtqueue *mvq; 1992 1993 if (!mvdev->actual_features) 1994 return; 1995 1996 if (!is_index_valid(mvdev, idx)) 1997 return; 1998 1999 if (is_ctrl_vq_idx(mvdev, idx)) { 2000 set_cvq_ready(mvdev, ready); 2001 return; 2002 } 2003 2004 mvq = &ndev->vqs[idx]; 2005 if (!ready) 2006 suspend_vq(ndev, mvq); 2007 2008 mvq->ready = ready; 2009 } 2010 2011 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2012 { 2013 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2014 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2015 2016 if (!is_index_valid(mvdev, idx)) 2017 return false; 2018 2019 if (is_ctrl_vq_idx(mvdev, idx)) 2020 return mvdev->cvq.ready; 2021 2022 return ndev->vqs[idx].ready; 2023 } 2024 2025 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2026 const struct vdpa_vq_state *state) 2027 { 2028 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2029 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2030 struct mlx5_vdpa_virtqueue *mvq; 2031 2032 if (!is_index_valid(mvdev, idx)) 2033 return -EINVAL; 2034 2035 if (is_ctrl_vq_idx(mvdev, idx)) { 2036 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2037 return 0; 2038 } 2039 2040 mvq = &ndev->vqs[idx]; 2041 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2042 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2043 return -EINVAL; 2044 } 2045 2046 mvq->used_idx = state->split.avail_index; 2047 mvq->avail_idx = state->split.avail_index; 2048 return 0; 2049 } 2050 2051 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2052 { 2053 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2054 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2055 struct mlx5_vdpa_virtqueue *mvq; 2056 struct mlx5_virtq_attr attr; 2057 int err; 2058 2059 if (!is_index_valid(mvdev, idx)) 2060 return -EINVAL; 2061 2062 if (is_ctrl_vq_idx(mvdev, idx)) { 2063 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2064 return 0; 2065 } 2066 2067 mvq = &ndev->vqs[idx]; 2068 /* If the virtq object was destroyed, use the value saved at 2069 * the last minute of suspend_vq. This caters for userspace 2070 * that cares about emulating the index after vq is stopped. 2071 */ 2072 if (!mvq->initialized) { 2073 /* Firmware returns a wrong value for the available index. 2074 * Since both values should be identical, we take the value of 2075 * used_idx which is reported correctly. 2076 */ 2077 state->split.avail_index = mvq->used_idx; 2078 return 0; 2079 } 2080 2081 err = query_virtqueue(ndev, mvq, &attr); 2082 if (err) { 2083 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2084 return err; 2085 } 2086 state->split.avail_index = attr.used_index; 2087 return 0; 2088 } 2089 2090 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2091 { 2092 return PAGE_SIZE; 2093 } 2094 2095 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) 2096 { 2097 return 0; 2098 } 2099 2100 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, 2101 MLX5_VIRTIO_NET_F_CSUM = 1 << 10, 2102 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, 2103 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, 2104 }; 2105 2106 static u64 mlx_to_vritio_features(u16 dev_features) 2107 { 2108 u64 result = 0; 2109 2110 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) 2111 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2112 if (dev_features & MLX5_VIRTIO_NET_F_CSUM) 2113 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2114 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) 2115 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2116 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) 2117 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2118 2119 return result; 2120 } 2121 2122 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2123 { 2124 u64 mlx_vdpa_features = 0; 2125 u16 dev_features; 2126 2127 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2128 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2129 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2130 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2131 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2132 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2133 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2134 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2135 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2136 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2137 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2138 2139 return mlx_vdpa_features; 2140 } 2141 2142 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2143 { 2144 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2145 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2146 2147 print_features(mvdev, ndev->mvdev.mlx_features, false); 2148 return ndev->mvdev.mlx_features; 2149 } 2150 2151 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2152 { 2153 /* Minimum features to expect */ 2154 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2155 return -EOPNOTSUPP; 2156 2157 /* Double check features combination sent down by the driver. 2158 * Fail invalid features due to absence of the depended feature. 2159 * 2160 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2161 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2162 * By failing the invalid features sent down by untrusted drivers, 2163 * we're assured the assumption made upon is_index_valid() and 2164 * is_ctrl_vq_idx() will not be compromised. 2165 */ 2166 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2167 BIT_ULL(VIRTIO_NET_F_MQ)) 2168 return -EINVAL; 2169 2170 return 0; 2171 } 2172 2173 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2174 { 2175 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2176 struct mlx5_control_vq *cvq = &mvdev->cvq; 2177 int err; 2178 int i; 2179 2180 for (i = 0; i < mvdev->max_vqs; i++) { 2181 err = setup_vq(ndev, &ndev->vqs[i]); 2182 if (err) 2183 goto err_vq; 2184 } 2185 2186 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 2187 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2188 MLX5_CVQ_MAX_ENT, false, 2189 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2190 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2191 (struct vring_used *)(uintptr_t)cvq->device_addr); 2192 if (err) 2193 goto err_vq; 2194 } 2195 2196 return 0; 2197 2198 err_vq: 2199 for (--i; i >= 0; i--) 2200 teardown_vq(ndev, &ndev->vqs[i]); 2201 2202 return err; 2203 } 2204 2205 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2206 { 2207 struct mlx5_vdpa_virtqueue *mvq; 2208 int i; 2209 2210 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2211 mvq = &ndev->vqs[i]; 2212 if (!mvq->initialized) 2213 continue; 2214 2215 teardown_vq(ndev, mvq); 2216 } 2217 } 2218 2219 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2220 { 2221 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2222 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2223 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2224 mvdev->max_idx = mvdev->max_vqs; 2225 } else { 2226 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2227 * CVQ gets index 2 2228 */ 2229 mvdev->max_idx = 2; 2230 } 2231 } else { 2232 /* Two data virtqueues only: one for rx and one for tx */ 2233 mvdev->max_idx = 1; 2234 } 2235 } 2236 2237 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2238 { 2239 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2240 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2241 int err; 2242 2243 print_features(mvdev, features, true); 2244 2245 err = verify_driver_features(mvdev, features); 2246 if (err) 2247 return err; 2248 2249 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2250 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2251 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2252 else 2253 ndev->rqt_size = 1; 2254 2255 ndev->cur_num_vqs = 2 * ndev->rqt_size; 2256 2257 update_cvq_info(mvdev); 2258 return err; 2259 } 2260 2261 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2262 { 2263 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2264 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2265 2266 ndev->config_cb = *cb; 2267 } 2268 2269 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2270 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2271 { 2272 return MLX5_VDPA_MAX_VQ_ENTRIES; 2273 } 2274 2275 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2276 { 2277 return VIRTIO_ID_NET; 2278 } 2279 2280 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2281 { 2282 return PCI_VENDOR_ID_MELLANOX; 2283 } 2284 2285 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2286 { 2287 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2288 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2289 2290 print_status(mvdev, ndev->mvdev.status, false); 2291 return ndev->mvdev.status; 2292 } 2293 2294 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2295 { 2296 struct mlx5_vq_restore_info *ri = &mvq->ri; 2297 struct mlx5_virtq_attr attr = {}; 2298 int err; 2299 2300 if (mvq->initialized) { 2301 err = query_virtqueue(ndev, mvq, &attr); 2302 if (err) 2303 return err; 2304 } 2305 2306 ri->avail_index = attr.available_index; 2307 ri->used_index = attr.used_index; 2308 ri->ready = mvq->ready; 2309 ri->num_ent = mvq->num_ent; 2310 ri->desc_addr = mvq->desc_addr; 2311 ri->device_addr = mvq->device_addr; 2312 ri->driver_addr = mvq->driver_addr; 2313 ri->restore = true; 2314 return 0; 2315 } 2316 2317 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2318 { 2319 int i; 2320 2321 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2322 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2323 save_channel_info(ndev, &ndev->vqs[i]); 2324 } 2325 return 0; 2326 } 2327 2328 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2329 { 2330 int i; 2331 2332 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2333 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2334 } 2335 2336 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2337 { 2338 struct mlx5_vdpa_virtqueue *mvq; 2339 struct mlx5_vq_restore_info *ri; 2340 int i; 2341 2342 mlx5_clear_vqs(ndev); 2343 init_mvqs(ndev); 2344 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2345 mvq = &ndev->vqs[i]; 2346 ri = &mvq->ri; 2347 if (!ri->restore) 2348 continue; 2349 2350 mvq->avail_idx = ri->avail_index; 2351 mvq->used_idx = ri->used_index; 2352 mvq->ready = ri->ready; 2353 mvq->num_ent = ri->num_ent; 2354 mvq->desc_addr = ri->desc_addr; 2355 mvq->device_addr = ri->device_addr; 2356 mvq->driver_addr = ri->driver_addr; 2357 } 2358 } 2359 2360 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 2361 { 2362 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2363 int err; 2364 2365 suspend_vqs(ndev); 2366 err = save_channels_info(ndev); 2367 if (err) 2368 goto err_mr; 2369 2370 teardown_driver(ndev); 2371 mlx5_vdpa_destroy_mr(mvdev); 2372 err = mlx5_vdpa_create_mr(mvdev, iotlb); 2373 if (err) 2374 goto err_mr; 2375 2376 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2377 goto err_mr; 2378 2379 restore_channels_info(ndev); 2380 err = setup_driver(mvdev); 2381 if (err) 2382 goto err_setup; 2383 2384 return 0; 2385 2386 err_setup: 2387 mlx5_vdpa_destroy_mr(mvdev); 2388 err_mr: 2389 return err; 2390 } 2391 2392 /* reslock must be held for this function */ 2393 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2394 { 2395 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2396 int err; 2397 2398 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2399 2400 if (ndev->setup) { 2401 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2402 err = 0; 2403 goto out; 2404 } 2405 err = setup_virtqueues(mvdev); 2406 if (err) { 2407 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2408 goto out; 2409 } 2410 2411 err = create_rqt(ndev); 2412 if (err) { 2413 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2414 goto err_rqt; 2415 } 2416 2417 err = create_tir(ndev); 2418 if (err) { 2419 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2420 goto err_tir; 2421 } 2422 2423 err = setup_steering(ndev); 2424 if (err) { 2425 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2426 goto err_fwd; 2427 } 2428 ndev->setup = true; 2429 2430 return 0; 2431 2432 err_fwd: 2433 destroy_tir(ndev); 2434 err_tir: 2435 destroy_rqt(ndev); 2436 err_rqt: 2437 teardown_virtqueues(ndev); 2438 out: 2439 return err; 2440 } 2441 2442 /* reslock must be held for this function */ 2443 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2444 { 2445 2446 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2447 2448 if (!ndev->setup) 2449 return; 2450 2451 teardown_steering(ndev); 2452 destroy_tir(ndev); 2453 destroy_rqt(ndev); 2454 teardown_virtqueues(ndev); 2455 ndev->setup = false; 2456 } 2457 2458 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2459 { 2460 int i; 2461 2462 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2463 ndev->vqs[i].ready = false; 2464 2465 ndev->mvdev.cvq.ready = false; 2466 } 2467 2468 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2469 { 2470 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2471 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2472 int err; 2473 2474 print_status(mvdev, status, true); 2475 2476 down_write(&ndev->reslock); 2477 2478 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2479 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2480 err = setup_driver(mvdev); 2481 if (err) { 2482 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2483 goto err_setup; 2484 } 2485 } else { 2486 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2487 goto err_clear; 2488 } 2489 } 2490 2491 ndev->mvdev.status = status; 2492 up_write(&ndev->reslock); 2493 return; 2494 2495 err_setup: 2496 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2497 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2498 err_clear: 2499 up_write(&ndev->reslock); 2500 } 2501 2502 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2503 { 2504 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2505 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2506 2507 print_status(mvdev, 0, true); 2508 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2509 2510 down_write(&ndev->reslock); 2511 teardown_driver(ndev); 2512 clear_vqs_ready(ndev); 2513 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2514 ndev->mvdev.status = 0; 2515 ndev->cur_num_vqs = 0; 2516 ndev->mvdev.cvq.received_desc = 0; 2517 ndev->mvdev.cvq.completed_desc = 0; 2518 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2519 ndev->mvdev.actual_features = 0; 2520 ++mvdev->generation; 2521 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2522 if (mlx5_vdpa_create_mr(mvdev, NULL)) 2523 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2524 } 2525 up_write(&ndev->reslock); 2526 2527 return 0; 2528 } 2529 2530 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2531 { 2532 return sizeof(struct virtio_net_config); 2533 } 2534 2535 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2536 unsigned int len) 2537 { 2538 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2539 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2540 2541 if (offset + len <= sizeof(struct virtio_net_config)) 2542 memcpy(buf, (u8 *)&ndev->config + offset, len); 2543 } 2544 2545 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2546 unsigned int len) 2547 { 2548 /* not supported */ 2549 } 2550 2551 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2552 { 2553 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2554 2555 return mvdev->generation; 2556 } 2557 2558 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2559 struct vhost_iotlb *iotlb) 2560 { 2561 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2562 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2563 bool change_map; 2564 int err; 2565 2566 down_write(&ndev->reslock); 2567 2568 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); 2569 if (err) { 2570 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2571 goto err; 2572 } 2573 2574 if (change_map) 2575 err = mlx5_vdpa_change_map(mvdev, iotlb); 2576 2577 err: 2578 up_write(&ndev->reslock); 2579 return err; 2580 } 2581 2582 static void mlx5_vdpa_free(struct vdpa_device *vdev) 2583 { 2584 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2585 struct mlx5_core_dev *pfmdev; 2586 struct mlx5_vdpa_net *ndev; 2587 2588 ndev = to_mlx5_vdpa_ndev(mvdev); 2589 2590 free_resources(ndev); 2591 mlx5_vdpa_destroy_mr(mvdev); 2592 if (!is_zero_ether_addr(ndev->config.mac)) { 2593 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2594 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 2595 } 2596 mlx5_vdpa_free_resources(&ndev->mvdev); 2597 kfree(ndev->event_cbs); 2598 kfree(ndev->vqs); 2599 } 2600 2601 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 2602 { 2603 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2604 struct vdpa_notification_area ret = {}; 2605 struct mlx5_vdpa_net *ndev; 2606 phys_addr_t addr; 2607 2608 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2609 return ret; 2610 2611 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 2612 * notification to avoid the risk of mapping pages that contain BAR of more 2613 * than one SF 2614 */ 2615 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 2616 return ret; 2617 2618 ndev = to_mlx5_vdpa_ndev(mvdev); 2619 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 2620 ret.addr = addr; 2621 ret.size = PAGE_SIZE; 2622 return ret; 2623 } 2624 2625 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) 2626 { 2627 return -EOPNOTSUPP; 2628 } 2629 2630 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 2631 { 2632 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2633 2634 return mvdev->actual_features; 2635 } 2636 2637 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 2638 u64 *received_desc, u64 *completed_desc) 2639 { 2640 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 2641 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 2642 void *cmd_hdr; 2643 void *ctx; 2644 int err; 2645 2646 if (!counters_supported(&ndev->mvdev)) 2647 return -EOPNOTSUPP; 2648 2649 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 2650 return -EAGAIN; 2651 2652 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 2653 2654 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 2655 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 2656 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 2657 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 2658 2659 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 2660 if (err) 2661 return err; 2662 2663 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 2664 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 2665 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 2666 return 0; 2667 } 2668 2669 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 2670 struct sk_buff *msg, 2671 struct netlink_ext_ack *extack) 2672 { 2673 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2674 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2675 struct mlx5_vdpa_virtqueue *mvq; 2676 struct mlx5_control_vq *cvq; 2677 u64 received_desc; 2678 u64 completed_desc; 2679 int err = 0; 2680 2681 down_read(&ndev->reslock); 2682 if (!is_index_valid(mvdev, idx)) { 2683 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 2684 err = -EINVAL; 2685 goto out_err; 2686 } 2687 2688 if (idx == ctrl_vq_idx(mvdev)) { 2689 cvq = &mvdev->cvq; 2690 received_desc = cvq->received_desc; 2691 completed_desc = cvq->completed_desc; 2692 goto out; 2693 } 2694 2695 mvq = &ndev->vqs[idx]; 2696 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 2697 if (err) { 2698 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 2699 goto out_err; 2700 } 2701 2702 out: 2703 err = -EMSGSIZE; 2704 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 2705 goto out_err; 2706 2707 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 2708 VDPA_ATTR_PAD)) 2709 goto out_err; 2710 2711 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 2712 goto out_err; 2713 2714 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 2715 VDPA_ATTR_PAD)) 2716 goto out_err; 2717 2718 err = 0; 2719 out_err: 2720 up_read(&ndev->reslock); 2721 return err; 2722 } 2723 2724 static const struct vdpa_config_ops mlx5_vdpa_ops = { 2725 .set_vq_address = mlx5_vdpa_set_vq_address, 2726 .set_vq_num = mlx5_vdpa_set_vq_num, 2727 .kick_vq = mlx5_vdpa_kick_vq, 2728 .set_vq_cb = mlx5_vdpa_set_vq_cb, 2729 .set_vq_ready = mlx5_vdpa_set_vq_ready, 2730 .get_vq_ready = mlx5_vdpa_get_vq_ready, 2731 .set_vq_state = mlx5_vdpa_set_vq_state, 2732 .get_vq_state = mlx5_vdpa_get_vq_state, 2733 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 2734 .get_vq_notification = mlx5_get_vq_notification, 2735 .get_vq_irq = mlx5_get_vq_irq, 2736 .get_vq_align = mlx5_vdpa_get_vq_align, 2737 .get_vq_group = mlx5_vdpa_get_vq_group, 2738 .get_device_features = mlx5_vdpa_get_device_features, 2739 .set_driver_features = mlx5_vdpa_set_driver_features, 2740 .get_driver_features = mlx5_vdpa_get_driver_features, 2741 .set_config_cb = mlx5_vdpa_set_config_cb, 2742 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 2743 .get_device_id = mlx5_vdpa_get_device_id, 2744 .get_vendor_id = mlx5_vdpa_get_vendor_id, 2745 .get_status = mlx5_vdpa_get_status, 2746 .set_status = mlx5_vdpa_set_status, 2747 .reset = mlx5_vdpa_reset, 2748 .get_config_size = mlx5_vdpa_get_config_size, 2749 .get_config = mlx5_vdpa_get_config, 2750 .set_config = mlx5_vdpa_set_config, 2751 .get_generation = mlx5_vdpa_get_generation, 2752 .set_map = mlx5_vdpa_set_map, 2753 .free = mlx5_vdpa_free, 2754 }; 2755 2756 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 2757 { 2758 u16 hw_mtu; 2759 int err; 2760 2761 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 2762 if (err) 2763 return err; 2764 2765 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 2766 return 0; 2767 } 2768 2769 static int alloc_resources(struct mlx5_vdpa_net *ndev) 2770 { 2771 struct mlx5_vdpa_net_resources *res = &ndev->res; 2772 int err; 2773 2774 if (res->valid) { 2775 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 2776 return -EEXIST; 2777 } 2778 2779 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 2780 if (err) 2781 return err; 2782 2783 err = create_tis(ndev); 2784 if (err) 2785 goto err_tis; 2786 2787 res->valid = true; 2788 2789 return 0; 2790 2791 err_tis: 2792 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 2793 return err; 2794 } 2795 2796 static void free_resources(struct mlx5_vdpa_net *ndev) 2797 { 2798 struct mlx5_vdpa_net_resources *res = &ndev->res; 2799 2800 if (!res->valid) 2801 return; 2802 2803 destroy_tis(ndev); 2804 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 2805 res->valid = false; 2806 } 2807 2808 static void init_mvqs(struct mlx5_vdpa_net *ndev) 2809 { 2810 struct mlx5_vdpa_virtqueue *mvq; 2811 int i; 2812 2813 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 2814 mvq = &ndev->vqs[i]; 2815 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2816 mvq->index = i; 2817 mvq->ndev = ndev; 2818 mvq->fwqp.fw = true; 2819 } 2820 for (; i < ndev->mvdev.max_vqs; i++) { 2821 mvq = &ndev->vqs[i]; 2822 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2823 mvq->index = i; 2824 mvq->ndev = ndev; 2825 } 2826 } 2827 2828 struct mlx5_vdpa_mgmtdev { 2829 struct vdpa_mgmt_dev mgtdev; 2830 struct mlx5_adev *madev; 2831 struct mlx5_vdpa_net *ndev; 2832 }; 2833 2834 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2835 { 2836 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2837 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2838 int err; 2839 2840 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2841 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2842 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2843 if (vport) 2844 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2845 2846 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2847 if (err) 2848 return 0; 2849 2850 return MLX5_GET(query_vport_state_out, out, state); 2851 } 2852 2853 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2854 { 2855 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2856 VPORT_STATE_UP) 2857 return true; 2858 2859 return false; 2860 } 2861 2862 static void update_carrier(struct work_struct *work) 2863 { 2864 struct mlx5_vdpa_wq_ent *wqent; 2865 struct mlx5_vdpa_dev *mvdev; 2866 struct mlx5_vdpa_net *ndev; 2867 2868 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2869 mvdev = wqent->mvdev; 2870 ndev = to_mlx5_vdpa_ndev(mvdev); 2871 if (get_link_state(mvdev)) 2872 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2873 else 2874 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2875 2876 if (ndev->config_cb.callback) 2877 ndev->config_cb.callback(ndev->config_cb.private); 2878 2879 kfree(wqent); 2880 } 2881 2882 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2883 { 2884 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2885 struct mlx5_eqe *eqe = param; 2886 int ret = NOTIFY_DONE; 2887 struct mlx5_vdpa_wq_ent *wqent; 2888 2889 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2890 switch (eqe->sub_type) { 2891 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2892 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2893 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2894 if (!wqent) 2895 return NOTIFY_DONE; 2896 2897 wqent->mvdev = &ndev->mvdev; 2898 INIT_WORK(&wqent->work, update_carrier); 2899 queue_work(ndev->mvdev.wq, &wqent->work); 2900 ret = NOTIFY_OK; 2901 break; 2902 default: 2903 return NOTIFY_DONE; 2904 } 2905 return ret; 2906 } 2907 return ret; 2908 } 2909 2910 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 2911 { 2912 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 2913 void *in; 2914 int err; 2915 2916 in = kvzalloc(inlen, GFP_KERNEL); 2917 if (!in) 2918 return -ENOMEM; 2919 2920 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 2921 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 2922 mtu + MLX5V_ETH_HARD_MTU); 2923 MLX5_SET(modify_nic_vport_context_in, in, opcode, 2924 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 2925 2926 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 2927 2928 kvfree(in); 2929 return err; 2930 } 2931 2932 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 2933 const struct vdpa_dev_set_config *add_config) 2934 { 2935 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 2936 struct virtio_net_config *config; 2937 struct mlx5_core_dev *pfmdev; 2938 struct mlx5_vdpa_dev *mvdev; 2939 struct mlx5_vdpa_net *ndev; 2940 struct mlx5_core_dev *mdev; 2941 u32 max_vqs; 2942 u16 mtu; 2943 int err; 2944 2945 if (mgtdev->ndev) 2946 return -ENOSPC; 2947 2948 mdev = mgtdev->madev->mdev; 2949 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 2950 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 2951 dev_warn(mdev->device, "missing support for split virtqueues\n"); 2952 return -EOPNOTSUPP; 2953 } 2954 2955 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 2956 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 2957 if (max_vqs < 2) { 2958 dev_warn(mdev->device, 2959 "%d virtqueues are supported. At least 2 are required\n", 2960 max_vqs); 2961 return -EAGAIN; 2962 } 2963 2964 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 2965 if (add_config->net.max_vq_pairs > max_vqs / 2) 2966 return -EINVAL; 2967 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 2968 } else { 2969 max_vqs = 2; 2970 } 2971 2972 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 2973 1, 1, name, false); 2974 if (IS_ERR(ndev)) 2975 return PTR_ERR(ndev); 2976 2977 ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features; 2978 ndev->mvdev.max_vqs = max_vqs; 2979 mvdev = &ndev->mvdev; 2980 mvdev->mdev = mdev; 2981 2982 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 2983 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 2984 if (!ndev->vqs || !ndev->event_cbs) { 2985 err = -ENOMEM; 2986 goto err_alloc; 2987 } 2988 2989 init_mvqs(ndev); 2990 init_rwsem(&ndev->reslock); 2991 config = &ndev->config; 2992 2993 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 2994 err = config_func_mtu(mdev, add_config->net.mtu); 2995 if (err) 2996 goto err_alloc; 2997 } 2998 2999 err = query_mtu(mdev, &mtu); 3000 if (err) 3001 goto err_alloc; 3002 3003 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3004 3005 if (get_link_state(mvdev)) 3006 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3007 else 3008 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3009 3010 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3011 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3012 } else { 3013 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3014 if (err) 3015 goto err_alloc; 3016 } 3017 3018 if (!is_zero_ether_addr(config->mac)) { 3019 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3020 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3021 if (err) 3022 goto err_alloc; 3023 3024 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); 3025 } 3026 3027 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3028 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3029 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3030 if (err) 3031 goto err_mpfs; 3032 3033 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3034 err = mlx5_vdpa_create_mr(mvdev, NULL); 3035 if (err) 3036 goto err_res; 3037 } 3038 3039 err = alloc_resources(ndev); 3040 if (err) 3041 goto err_mr; 3042 3043 ndev->cvq_ent.mvdev = mvdev; 3044 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3045 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3046 if (!mvdev->wq) { 3047 err = -ENOMEM; 3048 goto err_res2; 3049 } 3050 3051 ndev->nb.notifier_call = event_handler; 3052 mlx5_notifier_register(mdev, &ndev->nb); 3053 mvdev->vdev.mdev = &mgtdev->mgtdev; 3054 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3055 if (err) 3056 goto err_reg; 3057 3058 mgtdev->ndev = ndev; 3059 return 0; 3060 3061 err_reg: 3062 destroy_workqueue(mvdev->wq); 3063 err_res2: 3064 free_resources(ndev); 3065 err_mr: 3066 mlx5_vdpa_destroy_mr(mvdev); 3067 err_res: 3068 mlx5_vdpa_free_resources(&ndev->mvdev); 3069 err_mpfs: 3070 if (!is_zero_ether_addr(config->mac)) 3071 mlx5_mpfs_del_mac(pfmdev, config->mac); 3072 err_alloc: 3073 put_device(&mvdev->vdev.dev); 3074 return err; 3075 } 3076 3077 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3078 { 3079 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3080 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3081 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3082 struct workqueue_struct *wq; 3083 3084 mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); 3085 wq = mvdev->wq; 3086 mvdev->wq = NULL; 3087 destroy_workqueue(wq); 3088 _vdpa_unregister_device(dev); 3089 mgtdev->ndev = NULL; 3090 } 3091 3092 static const struct vdpa_mgmtdev_ops mdev_ops = { 3093 .dev_add = mlx5_vdpa_dev_add, 3094 .dev_del = mlx5_vdpa_dev_del, 3095 }; 3096 3097 static struct virtio_device_id id_table[] = { 3098 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3099 { 0 }, 3100 }; 3101 3102 static int mlx5v_probe(struct auxiliary_device *adev, 3103 const struct auxiliary_device_id *id) 3104 3105 { 3106 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3107 struct mlx5_core_dev *mdev = madev->mdev; 3108 struct mlx5_vdpa_mgmtdev *mgtdev; 3109 int err; 3110 3111 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3112 if (!mgtdev) 3113 return -ENOMEM; 3114 3115 mgtdev->mgtdev.ops = &mdev_ops; 3116 mgtdev->mgtdev.device = mdev->device; 3117 mgtdev->mgtdev.id_table = id_table; 3118 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3119 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3120 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); 3121 mgtdev->mgtdev.max_supported_vqs = 3122 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3123 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3124 mgtdev->madev = madev; 3125 3126 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3127 if (err) 3128 goto reg_err; 3129 3130 auxiliary_set_drvdata(adev, mgtdev); 3131 3132 return 0; 3133 3134 reg_err: 3135 kfree(mgtdev); 3136 return err; 3137 } 3138 3139 static void mlx5v_remove(struct auxiliary_device *adev) 3140 { 3141 struct mlx5_vdpa_mgmtdev *mgtdev; 3142 3143 mgtdev = auxiliary_get_drvdata(adev); 3144 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3145 kfree(mgtdev); 3146 } 3147 3148 static const struct auxiliary_device_id mlx5v_id_table[] = { 3149 { .name = MLX5_ADEV_NAME ".vnet", }, 3150 {}, 3151 }; 3152 3153 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3154 3155 static struct auxiliary_driver mlx5v_driver = { 3156 .name = "vnet", 3157 .probe = mlx5v_probe, 3158 .remove = mlx5v_remove, 3159 .id_table = mlx5v_id_table, 3160 }; 3161 3162 module_auxiliary_driver(mlx5v_driver); 3163