1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <linux/virtio_config.h> 11 #include <linux/auxiliary_bus.h> 12 #include <linux/mlx5/cq.h> 13 #include <linux/mlx5/qp.h> 14 #include <linux/mlx5/device.h> 15 #include <linux/mlx5/driver.h> 16 #include <linux/mlx5/vport.h> 17 #include <linux/mlx5/fs.h> 18 #include <linux/mlx5/mlx5_ifc_vdpa.h> 19 #include <linux/mlx5/mpfs.h> 20 #include "mlx5_vdpa.h" 21 #include "mlx5_vnet.h" 22 23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 24 MODULE_DESCRIPTION("Mellanox VDPA driver"); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 27 #define VALID_FEATURES_MASK \ 28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 41 42 #define VALID_STATUS_MASK \ 43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 45 46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 47 48 #define MLX5V_UNTAGGED 0x1000 49 50 struct mlx5_vdpa_cq_buf { 51 struct mlx5_frag_buf_ctrl fbc; 52 struct mlx5_frag_buf frag_buf; 53 int cqe_size; 54 int nent; 55 }; 56 57 struct mlx5_vdpa_cq { 58 struct mlx5_core_cq mcq; 59 struct mlx5_vdpa_cq_buf buf; 60 struct mlx5_db db; 61 int cqe; 62 }; 63 64 struct mlx5_vdpa_umem { 65 struct mlx5_frag_buf_ctrl fbc; 66 struct mlx5_frag_buf frag_buf; 67 int size; 68 u32 id; 69 }; 70 71 struct mlx5_vdpa_qp { 72 struct mlx5_core_qp mqp; 73 struct mlx5_frag_buf frag_buf; 74 struct mlx5_db db; 75 u16 head; 76 bool fw; 77 }; 78 79 struct mlx5_vq_restore_info { 80 u32 num_ent; 81 u64 desc_addr; 82 u64 device_addr; 83 u64 driver_addr; 84 u16 avail_index; 85 u16 used_index; 86 struct msi_map map; 87 bool ready; 88 bool restore; 89 }; 90 91 struct mlx5_vdpa_virtqueue { 92 bool ready; 93 u64 desc_addr; 94 u64 device_addr; 95 u64 driver_addr; 96 u32 num_ent; 97 98 /* Resources for implementing the notification channel from the device 99 * to the driver. fwqp is the firmware end of an RC connection; the 100 * other end is vqqp used by the driver. cq is where completions are 101 * reported. 102 */ 103 struct mlx5_vdpa_cq cq; 104 struct mlx5_vdpa_qp fwqp; 105 struct mlx5_vdpa_qp vqqp; 106 107 /* umem resources are required for the virtqueue operation. They're use 108 * is internal and they must be provided by the driver. 109 */ 110 struct mlx5_vdpa_umem umem1; 111 struct mlx5_vdpa_umem umem2; 112 struct mlx5_vdpa_umem umem3; 113 114 u32 counter_set_id; 115 bool initialized; 116 int index; 117 u32 virtq_id; 118 struct mlx5_vdpa_net *ndev; 119 u16 avail_idx; 120 u16 used_idx; 121 int fw_state; 122 struct msi_map map; 123 124 /* keep last in the struct */ 125 struct mlx5_vq_restore_info ri; 126 }; 127 128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 129 { 130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 132 return idx < 2; 133 else 134 return idx < 3; 135 } 136 137 return idx <= mvdev->max_idx; 138 } 139 140 static void free_resources(struct mlx5_vdpa_net *ndev); 141 static void init_mvqs(struct mlx5_vdpa_net *ndev); 142 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 143 static void teardown_driver(struct mlx5_vdpa_net *ndev); 144 145 static bool mlx5_vdpa_debug; 146 147 #define MLX5_LOG_VIO_FLAG(_feature) \ 148 do { \ 149 if (features & BIT_ULL(_feature)) \ 150 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 151 } while (0) 152 153 #define MLX5_LOG_VIO_STAT(_status) \ 154 do { \ 155 if (status & (_status)) \ 156 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 157 } while (0) 158 159 /* TODO: cross-endian support */ 160 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 161 { 162 return virtio_legacy_is_little_endian() || 163 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 164 } 165 166 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 167 { 168 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 169 } 170 171 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 172 { 173 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 174 } 175 176 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 177 { 178 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 179 return 2; 180 181 return mvdev->max_vqs; 182 } 183 184 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 185 { 186 return idx == ctrl_vq_idx(mvdev); 187 } 188 189 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 190 { 191 if (status & ~VALID_STATUS_MASK) 192 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 193 status & ~VALID_STATUS_MASK); 194 195 if (!mlx5_vdpa_debug) 196 return; 197 198 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 199 if (set && !status) { 200 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 201 return; 202 } 203 204 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 205 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 210 } 211 212 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 213 { 214 if (features & ~VALID_FEATURES_MASK) 215 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 216 features & ~VALID_FEATURES_MASK); 217 218 if (!mlx5_vdpa_debug) 219 return; 220 221 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 222 if (!features) 223 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 224 225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 252 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 253 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 254 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 256 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 258 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 259 } 260 261 static int create_tis(struct mlx5_vdpa_net *ndev) 262 { 263 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 264 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 265 void *tisc; 266 int err; 267 268 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 269 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 270 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 271 if (err) 272 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 273 274 return err; 275 } 276 277 static void destroy_tis(struct mlx5_vdpa_net *ndev) 278 { 279 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 280 } 281 282 #define MLX5_VDPA_CQE_SIZE 64 283 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 284 285 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 286 { 287 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 288 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 289 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 290 int err; 291 292 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 293 ndev->mvdev.mdev->priv.numa_node); 294 if (err) 295 return err; 296 297 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 298 299 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 300 buf->nent = nent; 301 302 return 0; 303 } 304 305 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 306 { 307 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 308 309 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 310 ndev->mvdev.mdev->priv.numa_node); 311 } 312 313 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 314 { 315 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 316 } 317 318 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 319 { 320 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 321 } 322 323 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 324 { 325 struct mlx5_cqe64 *cqe64; 326 void *cqe; 327 int i; 328 329 for (i = 0; i < buf->nent; i++) { 330 cqe = get_cqe(vcq, i); 331 cqe64 = cqe; 332 cqe64->op_own = MLX5_CQE_INVALID << 4; 333 } 334 } 335 336 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 337 { 338 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 339 340 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 341 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 342 return cqe64; 343 344 return NULL; 345 } 346 347 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 348 { 349 vqp->head += n; 350 vqp->db.db[0] = cpu_to_be32(vqp->head); 351 } 352 353 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 354 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 355 { 356 struct mlx5_vdpa_qp *vqp; 357 __be64 *pas; 358 void *qpc; 359 360 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 361 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 362 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 363 if (vqp->fw) { 364 /* Firmware QP is allocated by the driver for the firmware's 365 * use so we can skip part of the params as they will be chosen by firmware 366 */ 367 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 368 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 369 MLX5_SET(qpc, qpc, no_sq, 1); 370 return; 371 } 372 373 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 374 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 375 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 376 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 377 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 378 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 379 MLX5_SET(qpc, qpc, no_sq, 1); 380 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 381 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 382 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 383 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 384 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 385 } 386 387 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 388 { 389 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 390 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 391 ndev->mvdev.mdev->priv.numa_node); 392 } 393 394 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 395 { 396 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 397 } 398 399 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 400 struct mlx5_vdpa_qp *vqp) 401 { 402 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 403 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 404 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 405 void *qpc; 406 void *in; 407 int err; 408 409 if (!vqp->fw) { 410 vqp = &mvq->vqqp; 411 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 412 if (err) 413 return err; 414 415 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 416 if (err) 417 goto err_db; 418 inlen += vqp->frag_buf.npages * sizeof(__be64); 419 } 420 421 in = kzalloc(inlen, GFP_KERNEL); 422 if (!in) { 423 err = -ENOMEM; 424 goto err_kzalloc; 425 } 426 427 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 428 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 429 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 430 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 431 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 432 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 433 if (!vqp->fw) 434 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 435 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 436 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 437 kfree(in); 438 if (err) 439 goto err_kzalloc; 440 441 vqp->mqp.uid = ndev->mvdev.res.uid; 442 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 443 444 if (!vqp->fw) 445 rx_post(vqp, mvq->num_ent); 446 447 return 0; 448 449 err_kzalloc: 450 if (!vqp->fw) 451 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 452 err_db: 453 if (!vqp->fw) 454 rq_buf_free(ndev, vqp); 455 456 return err; 457 } 458 459 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 460 { 461 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 462 463 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 464 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 465 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 466 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 467 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 468 if (!vqp->fw) { 469 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 470 rq_buf_free(ndev, vqp); 471 } 472 } 473 474 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 475 { 476 return get_sw_cqe(cq, cq->mcq.cons_index); 477 } 478 479 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 480 { 481 struct mlx5_cqe64 *cqe64; 482 483 cqe64 = next_cqe_sw(vcq); 484 if (!cqe64) 485 return -EAGAIN; 486 487 vcq->mcq.cons_index++; 488 return 0; 489 } 490 491 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 492 { 493 struct mlx5_vdpa_net *ndev = mvq->ndev; 494 struct vdpa_callback *event_cb; 495 496 event_cb = &ndev->event_cbs[mvq->index]; 497 mlx5_cq_set_ci(&mvq->cq.mcq); 498 499 /* make sure CQ cosumer update is visible to the hardware before updating 500 * RX doorbell record. 501 */ 502 dma_wmb(); 503 rx_post(&mvq->vqqp, num); 504 if (event_cb->callback) 505 event_cb->callback(event_cb->private); 506 } 507 508 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 509 { 510 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 511 struct mlx5_vdpa_net *ndev = mvq->ndev; 512 void __iomem *uar_page = ndev->mvdev.res.uar->map; 513 int num = 0; 514 515 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 516 num++; 517 if (num > mvq->num_ent / 2) { 518 /* If completions keep coming while we poll, we want to 519 * let the hardware know that we consumed them by 520 * updating the doorbell record. We also let vdpa core 521 * know about this so it passes it on the virtio driver 522 * on the guest. 523 */ 524 mlx5_vdpa_handle_completions(mvq, num); 525 num = 0; 526 } 527 } 528 529 if (num) 530 mlx5_vdpa_handle_completions(mvq, num); 531 532 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 533 } 534 535 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 536 { 537 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 538 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 539 void __iomem *uar_page = ndev->mvdev.res.uar->map; 540 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 541 struct mlx5_vdpa_cq *vcq = &mvq->cq; 542 __be64 *pas; 543 int inlen; 544 void *cqc; 545 void *in; 546 int err; 547 int eqn; 548 549 err = mlx5_db_alloc(mdev, &vcq->db); 550 if (err) 551 return err; 552 553 vcq->mcq.set_ci_db = vcq->db.db; 554 vcq->mcq.arm_db = vcq->db.db + 1; 555 vcq->mcq.cqe_sz = 64; 556 557 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 558 if (err) 559 goto err_db; 560 561 cq_frag_buf_init(vcq, &vcq->buf); 562 563 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 564 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 565 in = kzalloc(inlen, GFP_KERNEL); 566 if (!in) { 567 err = -ENOMEM; 568 goto err_vzalloc; 569 } 570 571 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 572 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 573 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 574 575 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 576 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 577 578 /* Use vector 0 by default. Consider adding code to choose least used 579 * vector. 580 */ 581 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 582 if (err) 583 goto err_vec; 584 585 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 586 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 587 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 588 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 589 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 590 591 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 592 if (err) 593 goto err_vec; 594 595 vcq->mcq.comp = mlx5_vdpa_cq_comp; 596 vcq->cqe = num_ent; 597 vcq->mcq.set_ci_db = vcq->db.db; 598 vcq->mcq.arm_db = vcq->db.db + 1; 599 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 600 kfree(in); 601 return 0; 602 603 err_vec: 604 kfree(in); 605 err_vzalloc: 606 cq_frag_buf_free(ndev, &vcq->buf); 607 err_db: 608 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 609 return err; 610 } 611 612 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 613 { 614 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 615 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 616 struct mlx5_vdpa_cq *vcq = &mvq->cq; 617 618 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 619 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 620 return; 621 } 622 cq_frag_buf_free(ndev, &vcq->buf); 623 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 624 } 625 626 static int read_umem_params(struct mlx5_vdpa_net *ndev) 627 { 628 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 629 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 630 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 631 int out_size; 632 void *caps; 633 void *out; 634 int err; 635 636 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 637 out = kzalloc(out_size, GFP_KERNEL); 638 if (!out) 639 return -ENOMEM; 640 641 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 642 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 643 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 644 if (err) { 645 mlx5_vdpa_warn(&ndev->mvdev, 646 "Failed reading vdpa umem capabilities with err %d\n", err); 647 goto out; 648 } 649 650 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 651 652 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 653 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 654 655 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 656 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 657 658 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 659 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 660 661 out: 662 kfree(out); 663 return 0; 664 } 665 666 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 667 struct mlx5_vdpa_umem **umemp) 668 { 669 u32 p_a; 670 u32 p_b; 671 672 switch (num) { 673 case 1: 674 p_a = ndev->umem_1_buffer_param_a; 675 p_b = ndev->umem_1_buffer_param_b; 676 *umemp = &mvq->umem1; 677 break; 678 case 2: 679 p_a = ndev->umem_2_buffer_param_a; 680 p_b = ndev->umem_2_buffer_param_b; 681 *umemp = &mvq->umem2; 682 break; 683 case 3: 684 p_a = ndev->umem_3_buffer_param_a; 685 p_b = ndev->umem_3_buffer_param_b; 686 *umemp = &mvq->umem3; 687 break; 688 } 689 690 (*umemp)->size = p_a * mvq->num_ent + p_b; 691 } 692 693 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 694 { 695 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 696 } 697 698 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 699 { 700 int inlen; 701 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 702 void *um; 703 void *in; 704 int err; 705 __be64 *pas; 706 struct mlx5_vdpa_umem *umem; 707 708 set_umem_size(ndev, mvq, num, &umem); 709 err = umem_frag_buf_alloc(ndev, umem, umem->size); 710 if (err) 711 return err; 712 713 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 714 715 in = kzalloc(inlen, GFP_KERNEL); 716 if (!in) { 717 err = -ENOMEM; 718 goto err_in; 719 } 720 721 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 722 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 723 um = MLX5_ADDR_OF(create_umem_in, in, umem); 724 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 725 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 726 727 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 728 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 729 730 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 731 if (err) { 732 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 733 goto err_cmd; 734 } 735 736 kfree(in); 737 umem->id = MLX5_GET(create_umem_out, out, umem_id); 738 739 return 0; 740 741 err_cmd: 742 kfree(in); 743 err_in: 744 umem_frag_buf_free(ndev, umem); 745 return err; 746 } 747 748 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 749 { 750 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 751 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 752 struct mlx5_vdpa_umem *umem; 753 754 switch (num) { 755 case 1: 756 umem = &mvq->umem1; 757 break; 758 case 2: 759 umem = &mvq->umem2; 760 break; 761 case 3: 762 umem = &mvq->umem3; 763 break; 764 } 765 766 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 767 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 768 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 769 return; 770 771 umem_frag_buf_free(ndev, umem); 772 } 773 774 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 775 { 776 int num; 777 int err; 778 779 for (num = 1; num <= 3; num++) { 780 err = create_umem(ndev, mvq, num); 781 if (err) 782 goto err_umem; 783 } 784 return 0; 785 786 err_umem: 787 for (num--; num > 0; num--) 788 umem_destroy(ndev, mvq, num); 789 790 return err; 791 } 792 793 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 794 { 795 int num; 796 797 for (num = 3; num > 0; num--) 798 umem_destroy(ndev, mvq, num); 799 } 800 801 static int get_queue_type(struct mlx5_vdpa_net *ndev) 802 { 803 u32 type_mask; 804 805 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 806 807 /* prefer split queue */ 808 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 809 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 810 811 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 812 813 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 814 } 815 816 static bool vq_is_tx(u16 idx) 817 { 818 return idx % 2; 819 } 820 821 enum { 822 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 823 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 824 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 825 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 826 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 827 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 828 MLX5_VIRTIO_NET_F_CSUM = 10, 829 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 830 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 831 }; 832 833 static u16 get_features(u64 features) 834 { 835 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 836 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 837 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 838 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 839 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 840 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 841 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 842 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 843 } 844 845 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 846 { 847 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 848 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 849 } 850 851 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 852 { 853 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 854 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 855 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 856 } 857 858 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 859 { 860 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 861 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 862 void *obj_context; 863 u16 mlx_features; 864 void *cmd_hdr; 865 void *vq_ctx; 866 void *in; 867 int err; 868 869 err = umems_create(ndev, mvq); 870 if (err) 871 return err; 872 873 in = kzalloc(inlen, GFP_KERNEL); 874 if (!in) { 875 err = -ENOMEM; 876 goto err_alloc; 877 } 878 879 mlx_features = get_features(ndev->mvdev.actual_features); 880 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 881 882 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 883 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 884 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 885 886 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 887 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 888 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 889 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 890 mlx_features >> 3); 891 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 892 mlx_features & 7); 893 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 894 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 895 896 if (vq_is_tx(mvq->index)) 897 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 898 899 if (mvq->map.virq) { 900 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 901 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 902 } else { 903 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 904 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 905 } 906 907 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 908 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 909 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 910 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 911 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 912 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 913 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 914 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); 915 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 916 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 917 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 918 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 919 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 920 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 921 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 922 if (counters_supported(&ndev->mvdev)) 923 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 924 925 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 926 if (err) 927 goto err_cmd; 928 929 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 930 kfree(in); 931 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 932 933 return 0; 934 935 err_cmd: 936 kfree(in); 937 err_alloc: 938 umems_destroy(ndev, mvq); 939 return err; 940 } 941 942 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 943 { 944 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 945 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 946 947 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 948 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 949 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 950 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 951 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 952 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 953 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 954 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 955 return; 956 } 957 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 958 umems_destroy(ndev, mvq); 959 } 960 961 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 962 { 963 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 964 } 965 966 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 967 { 968 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 969 } 970 971 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 972 int *outlen, u32 qpn, u32 rqpn) 973 { 974 void *qpc; 975 void *pp; 976 977 switch (cmd) { 978 case MLX5_CMD_OP_2RST_QP: 979 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 980 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 981 *in = kzalloc(*inlen, GFP_KERNEL); 982 *out = kzalloc(*outlen, GFP_KERNEL); 983 if (!*in || !*out) 984 goto outerr; 985 986 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 987 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 988 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 989 break; 990 case MLX5_CMD_OP_RST2INIT_QP: 991 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 992 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 993 *in = kzalloc(*inlen, GFP_KERNEL); 994 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 995 if (!*in || !*out) 996 goto outerr; 997 998 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 999 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1000 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1001 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1002 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1003 MLX5_SET(qpc, qpc, rwe, 1); 1004 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1005 MLX5_SET(ads, pp, vhca_port_num, 1); 1006 break; 1007 case MLX5_CMD_OP_INIT2RTR_QP: 1008 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1009 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1010 *in = kzalloc(*inlen, GFP_KERNEL); 1011 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1012 if (!*in || !*out) 1013 goto outerr; 1014 1015 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1016 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1017 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1018 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1019 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1020 MLX5_SET(qpc, qpc, log_msg_max, 30); 1021 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1022 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1023 MLX5_SET(ads, pp, fl, 1); 1024 break; 1025 case MLX5_CMD_OP_RTR2RTS_QP: 1026 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1027 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1028 *in = kzalloc(*inlen, GFP_KERNEL); 1029 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1030 if (!*in || !*out) 1031 goto outerr; 1032 1033 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1034 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1035 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1036 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1037 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1038 MLX5_SET(ads, pp, ack_timeout, 14); 1039 MLX5_SET(qpc, qpc, retry_count, 7); 1040 MLX5_SET(qpc, qpc, rnr_retry, 7); 1041 break; 1042 default: 1043 goto outerr_nullify; 1044 } 1045 1046 return; 1047 1048 outerr: 1049 kfree(*in); 1050 kfree(*out); 1051 outerr_nullify: 1052 *in = NULL; 1053 *out = NULL; 1054 } 1055 1056 static void free_inout(void *in, void *out) 1057 { 1058 kfree(in); 1059 kfree(out); 1060 } 1061 1062 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1063 * firmware. The fw argument indicates whether the subjected QP is the one used 1064 * by firmware. 1065 */ 1066 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1067 { 1068 int outlen; 1069 int inlen; 1070 void *out; 1071 void *in; 1072 int err; 1073 1074 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1075 if (!in || !out) 1076 return -ENOMEM; 1077 1078 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1079 free_inout(in, out); 1080 return err; 1081 } 1082 1083 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1084 { 1085 int err; 1086 1087 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1088 if (err) 1089 return err; 1090 1091 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1092 if (err) 1093 return err; 1094 1095 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1096 if (err) 1097 return err; 1098 1099 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1100 if (err) 1101 return err; 1102 1103 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1104 if (err) 1105 return err; 1106 1107 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1108 if (err) 1109 return err; 1110 1111 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1112 } 1113 1114 struct mlx5_virtq_attr { 1115 u8 state; 1116 u16 available_index; 1117 u16 used_index; 1118 }; 1119 1120 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1121 struct mlx5_virtq_attr *attr) 1122 { 1123 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1124 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1125 void *out; 1126 void *obj_context; 1127 void *cmd_hdr; 1128 int err; 1129 1130 out = kzalloc(outlen, GFP_KERNEL); 1131 if (!out) 1132 return -ENOMEM; 1133 1134 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1135 1136 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1137 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1138 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1139 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1140 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1141 if (err) 1142 goto err_cmd; 1143 1144 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1145 memset(attr, 0, sizeof(*attr)); 1146 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1147 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1148 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1149 kfree(out); 1150 return 0; 1151 1152 err_cmd: 1153 kfree(out); 1154 return err; 1155 } 1156 1157 static bool is_valid_state_change(int oldstate, int newstate) 1158 { 1159 switch (oldstate) { 1160 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1161 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1162 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1163 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1164 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1165 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1166 default: 1167 return false; 1168 } 1169 } 1170 1171 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1172 { 1173 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1174 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1175 void *obj_context; 1176 void *cmd_hdr; 1177 void *in; 1178 int err; 1179 1180 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1181 return 0; 1182 1183 if (!is_valid_state_change(mvq->fw_state, state)) 1184 return -EINVAL; 1185 1186 in = kzalloc(inlen, GFP_KERNEL); 1187 if (!in) 1188 return -ENOMEM; 1189 1190 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1191 1192 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1193 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1194 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1195 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1196 1197 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1198 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1199 MLX5_VIRTQ_MODIFY_MASK_STATE); 1200 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1201 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1202 kfree(in); 1203 if (!err) 1204 mvq->fw_state = state; 1205 1206 return err; 1207 } 1208 1209 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1210 { 1211 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1212 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1213 void *cmd_hdr; 1214 int err; 1215 1216 if (!counters_supported(&ndev->mvdev)) 1217 return 0; 1218 1219 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1220 1221 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1222 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1223 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1224 1225 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1226 if (err) 1227 return err; 1228 1229 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1230 1231 return 0; 1232 } 1233 1234 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1235 { 1236 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1237 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1238 1239 if (!counters_supported(&ndev->mvdev)) 1240 return; 1241 1242 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1243 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1244 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1245 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1246 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1247 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1248 } 1249 1250 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1251 { 1252 struct vdpa_callback *cb = priv; 1253 1254 if (cb->callback) 1255 return cb->callback(cb->private); 1256 1257 return IRQ_HANDLED; 1258 } 1259 1260 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1261 struct mlx5_vdpa_virtqueue *mvq) 1262 { 1263 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1264 struct mlx5_vdpa_irq_pool_entry *ent; 1265 int err; 1266 int i; 1267 1268 for (i = 0; i < irqp->num_ent; i++) { 1269 ent = &irqp->entries[i]; 1270 if (!ent->used) { 1271 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1272 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1273 ent->dev_id = &ndev->event_cbs[mvq->index]; 1274 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1275 ent->name, ent->dev_id); 1276 if (err) 1277 return; 1278 1279 ent->used = true; 1280 mvq->map = ent->map; 1281 return; 1282 } 1283 } 1284 } 1285 1286 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1287 struct mlx5_vdpa_virtqueue *mvq) 1288 { 1289 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1290 int i; 1291 1292 for (i = 0; i < irqp->num_ent; i++) 1293 if (mvq->map.virq == irqp->entries[i].map.virq) { 1294 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1295 irqp->entries[i].used = false; 1296 return; 1297 } 1298 } 1299 1300 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1301 { 1302 u16 idx = mvq->index; 1303 int err; 1304 1305 if (!mvq->num_ent) 1306 return 0; 1307 1308 if (mvq->initialized) 1309 return 0; 1310 1311 err = cq_create(ndev, idx, mvq->num_ent); 1312 if (err) 1313 return err; 1314 1315 err = qp_create(ndev, mvq, &mvq->fwqp); 1316 if (err) 1317 goto err_fwqp; 1318 1319 err = qp_create(ndev, mvq, &mvq->vqqp); 1320 if (err) 1321 goto err_vqqp; 1322 1323 err = connect_qps(ndev, mvq); 1324 if (err) 1325 goto err_connect; 1326 1327 err = counter_set_alloc(ndev, mvq); 1328 if (err) 1329 goto err_connect; 1330 1331 alloc_vector(ndev, mvq); 1332 err = create_virtqueue(ndev, mvq); 1333 if (err) 1334 goto err_vq; 1335 1336 if (mvq->ready) { 1337 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1338 if (err) { 1339 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1340 idx, err); 1341 goto err_modify; 1342 } 1343 } 1344 1345 mvq->initialized = true; 1346 return 0; 1347 1348 err_modify: 1349 destroy_virtqueue(ndev, mvq); 1350 err_vq: 1351 dealloc_vector(ndev, mvq); 1352 counter_set_dealloc(ndev, mvq); 1353 err_connect: 1354 qp_destroy(ndev, &mvq->vqqp); 1355 err_vqqp: 1356 qp_destroy(ndev, &mvq->fwqp); 1357 err_fwqp: 1358 cq_destroy(ndev, idx); 1359 return err; 1360 } 1361 1362 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1363 { 1364 struct mlx5_virtq_attr attr; 1365 1366 if (!mvq->initialized) 1367 return; 1368 1369 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1370 return; 1371 1372 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1373 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1374 1375 if (query_virtqueue(ndev, mvq, &attr)) { 1376 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1377 return; 1378 } 1379 mvq->avail_idx = attr.available_index; 1380 mvq->used_idx = attr.used_index; 1381 } 1382 1383 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1384 { 1385 int i; 1386 1387 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1388 suspend_vq(ndev, &ndev->vqs[i]); 1389 } 1390 1391 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1392 { 1393 if (!mvq->initialized) 1394 return; 1395 1396 suspend_vq(ndev, mvq); 1397 destroy_virtqueue(ndev, mvq); 1398 dealloc_vector(ndev, mvq); 1399 counter_set_dealloc(ndev, mvq); 1400 qp_destroy(ndev, &mvq->vqqp); 1401 qp_destroy(ndev, &mvq->fwqp); 1402 cq_destroy(ndev, mvq->index); 1403 mvq->initialized = false; 1404 } 1405 1406 static int create_rqt(struct mlx5_vdpa_net *ndev) 1407 { 1408 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1409 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1410 __be32 *list; 1411 void *rqtc; 1412 int inlen; 1413 void *in; 1414 int i, j; 1415 int err; 1416 1417 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1418 in = kzalloc(inlen, GFP_KERNEL); 1419 if (!in) 1420 return -ENOMEM; 1421 1422 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1423 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1424 1425 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1426 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1427 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1428 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1429 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1430 1431 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1432 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1433 kfree(in); 1434 if (err) 1435 return err; 1436 1437 return 0; 1438 } 1439 1440 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1441 1442 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1443 { 1444 int act_sz = roundup_pow_of_two(num / 2); 1445 __be32 *list; 1446 void *rqtc; 1447 int inlen; 1448 void *in; 1449 int i, j; 1450 int err; 1451 1452 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1453 in = kzalloc(inlen, GFP_KERNEL); 1454 if (!in) 1455 return -ENOMEM; 1456 1457 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1458 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1459 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1460 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1461 1462 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1463 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1464 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1465 1466 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1467 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1468 kfree(in); 1469 if (err) 1470 return err; 1471 1472 return 0; 1473 } 1474 1475 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1476 { 1477 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1478 } 1479 1480 static int create_tir(struct mlx5_vdpa_net *ndev) 1481 { 1482 #define HASH_IP_L4PORTS \ 1483 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1484 MLX5_HASH_FIELD_SEL_L4_DPORT) 1485 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1486 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1487 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1488 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1489 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1490 void *rss_key; 1491 void *outer; 1492 void *tirc; 1493 void *in; 1494 int err; 1495 1496 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1497 if (!in) 1498 return -ENOMEM; 1499 1500 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1501 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1502 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1503 1504 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1505 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1506 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1507 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1508 1509 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1510 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1511 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1512 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1513 1514 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1515 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1516 1517 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1518 kfree(in); 1519 if (err) 1520 return err; 1521 1522 mlx5_vdpa_add_tirn(ndev); 1523 return err; 1524 } 1525 1526 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1527 { 1528 mlx5_vdpa_remove_tirn(ndev); 1529 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1530 } 1531 1532 #define MAX_STEERING_ENT 0x8000 1533 #define MAX_STEERING_GROUPS 2 1534 1535 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1536 #define NUM_DESTS 2 1537 #else 1538 #define NUM_DESTS 1 1539 #endif 1540 1541 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1542 struct macvlan_node *node, 1543 struct mlx5_flow_act *flow_act, 1544 struct mlx5_flow_destination *dests) 1545 { 1546 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1547 int err; 1548 1549 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1550 if (IS_ERR(node->ucast_counter.counter)) 1551 return PTR_ERR(node->ucast_counter.counter); 1552 1553 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1554 if (IS_ERR(node->mcast_counter.counter)) { 1555 err = PTR_ERR(node->mcast_counter.counter); 1556 goto err_mcast_counter; 1557 } 1558 1559 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1560 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1561 return 0; 1562 1563 err_mcast_counter: 1564 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1565 return err; 1566 #else 1567 return 0; 1568 #endif 1569 } 1570 1571 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1572 struct macvlan_node *node) 1573 { 1574 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1575 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1576 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1577 #endif 1578 } 1579 1580 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1581 struct macvlan_node *node) 1582 { 1583 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1584 struct mlx5_flow_act flow_act = {}; 1585 struct mlx5_flow_spec *spec; 1586 void *headers_c; 1587 void *headers_v; 1588 u8 *dmac_c; 1589 u8 *dmac_v; 1590 int err; 1591 u16 vid; 1592 1593 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1594 if (!spec) 1595 return -ENOMEM; 1596 1597 vid = key2vid(node->macvlan); 1598 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1599 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1600 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1601 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1602 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1603 eth_broadcast_addr(dmac_c); 1604 ether_addr_copy(dmac_v, mac); 1605 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1606 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1607 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1608 } 1609 if (node->tagged) { 1610 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1611 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1612 } 1613 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1614 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1615 dests[0].tir_num = ndev->res.tirn; 1616 err = add_steering_counters(ndev, node, &flow_act, dests); 1617 if (err) 1618 goto out_free; 1619 1620 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1621 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1622 #endif 1623 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1624 if (IS_ERR(node->ucast_rule)) { 1625 err = PTR_ERR(node->ucast_rule); 1626 goto err_ucast; 1627 } 1628 1629 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1630 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1631 #endif 1632 1633 memset(dmac_c, 0, ETH_ALEN); 1634 memset(dmac_v, 0, ETH_ALEN); 1635 dmac_c[0] = 1; 1636 dmac_v[0] = 1; 1637 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1638 if (IS_ERR(node->mcast_rule)) { 1639 err = PTR_ERR(node->mcast_rule); 1640 goto err_mcast; 1641 } 1642 kvfree(spec); 1643 mlx5_vdpa_add_rx_counters(ndev, node); 1644 return 0; 1645 1646 err_mcast: 1647 mlx5_del_flow_rules(node->ucast_rule); 1648 err_ucast: 1649 remove_steering_counters(ndev, node); 1650 out_free: 1651 kvfree(spec); 1652 return err; 1653 } 1654 1655 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1656 struct macvlan_node *node) 1657 { 1658 mlx5_vdpa_remove_rx_counters(ndev, node); 1659 mlx5_del_flow_rules(node->ucast_rule); 1660 mlx5_del_flow_rules(node->mcast_rule); 1661 } 1662 1663 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1664 { 1665 u64 val; 1666 1667 if (!tagged) 1668 vlan = MLX5V_UNTAGGED; 1669 1670 val = (u64)vlan << 48 | 1671 (u64)mac[0] << 40 | 1672 (u64)mac[1] << 32 | 1673 (u64)mac[2] << 24 | 1674 (u64)mac[3] << 16 | 1675 (u64)mac[4] << 8 | 1676 (u64)mac[5]; 1677 1678 return val; 1679 } 1680 1681 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1682 { 1683 struct macvlan_node *pos; 1684 u32 idx; 1685 1686 idx = hash_64(value, 8); // tbd 8 1687 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1688 if (pos->macvlan == value) 1689 return pos; 1690 } 1691 return NULL; 1692 } 1693 1694 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1695 { 1696 struct macvlan_node *ptr; 1697 u64 val; 1698 u32 idx; 1699 int err; 1700 1701 val = search_val(mac, vid, tagged); 1702 if (mac_vlan_lookup(ndev, val)) 1703 return -EEXIST; 1704 1705 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1706 if (!ptr) 1707 return -ENOMEM; 1708 1709 ptr->tagged = tagged; 1710 ptr->macvlan = val; 1711 ptr->ndev = ndev; 1712 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1713 if (err) 1714 goto err_add; 1715 1716 idx = hash_64(val, 8); 1717 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1718 return 0; 1719 1720 err_add: 1721 kfree(ptr); 1722 return err; 1723 } 1724 1725 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1726 { 1727 struct macvlan_node *ptr; 1728 1729 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1730 if (!ptr) 1731 return; 1732 1733 hlist_del(&ptr->hlist); 1734 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1735 remove_steering_counters(ndev, ptr); 1736 kfree(ptr); 1737 } 1738 1739 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1740 { 1741 struct macvlan_node *pos; 1742 struct hlist_node *n; 1743 int i; 1744 1745 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1746 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1747 hlist_del(&pos->hlist); 1748 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1749 remove_steering_counters(ndev, pos); 1750 kfree(pos); 1751 } 1752 } 1753 } 1754 1755 static int setup_steering(struct mlx5_vdpa_net *ndev) 1756 { 1757 struct mlx5_flow_table_attr ft_attr = {}; 1758 struct mlx5_flow_namespace *ns; 1759 int err; 1760 1761 ft_attr.max_fte = MAX_STEERING_ENT; 1762 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1763 1764 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1765 if (!ns) { 1766 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1767 return -EOPNOTSUPP; 1768 } 1769 1770 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1771 if (IS_ERR(ndev->rxft)) { 1772 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1773 return PTR_ERR(ndev->rxft); 1774 } 1775 mlx5_vdpa_add_rx_flow_table(ndev); 1776 1777 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1778 if (err) 1779 goto err_add; 1780 1781 return 0; 1782 1783 err_add: 1784 mlx5_vdpa_remove_rx_flow_table(ndev); 1785 mlx5_destroy_flow_table(ndev->rxft); 1786 return err; 1787 } 1788 1789 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1790 { 1791 clear_mac_vlan_table(ndev); 1792 mlx5_vdpa_remove_rx_flow_table(ndev); 1793 mlx5_destroy_flow_table(ndev->rxft); 1794 } 1795 1796 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1797 { 1798 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1799 struct mlx5_control_vq *cvq = &mvdev->cvq; 1800 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1801 struct mlx5_core_dev *pfmdev; 1802 size_t read; 1803 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1804 1805 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1806 switch (cmd) { 1807 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1808 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1809 if (read != ETH_ALEN) 1810 break; 1811 1812 if (!memcmp(ndev->config.mac, mac, 6)) { 1813 status = VIRTIO_NET_OK; 1814 break; 1815 } 1816 1817 if (is_zero_ether_addr(mac)) 1818 break; 1819 1820 if (!is_zero_ether_addr(ndev->config.mac)) { 1821 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1822 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1823 ndev->config.mac); 1824 break; 1825 } 1826 } 1827 1828 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1829 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1830 mac); 1831 break; 1832 } 1833 1834 /* backup the original mac address so that if failed to add the forward rules 1835 * we could restore it 1836 */ 1837 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1838 1839 memcpy(ndev->config.mac, mac, ETH_ALEN); 1840 1841 /* Need recreate the flow table entry, so that the packet could forward back 1842 */ 1843 mac_vlan_del(ndev, mac_back, 0, false); 1844 1845 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1846 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1847 1848 /* Although it hardly run here, we still need double check */ 1849 if (is_zero_ether_addr(mac_back)) { 1850 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1851 break; 1852 } 1853 1854 /* Try to restore original mac address to MFPS table, and try to restore 1855 * the forward rule entry. 1856 */ 1857 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1858 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1859 ndev->config.mac); 1860 } 1861 1862 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1863 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1864 mac_back); 1865 } 1866 1867 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1868 1869 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1870 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1871 1872 break; 1873 } 1874 1875 status = VIRTIO_NET_OK; 1876 break; 1877 1878 default: 1879 break; 1880 } 1881 1882 return status; 1883 } 1884 1885 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1886 { 1887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1888 int cur_qps = ndev->cur_num_vqs / 2; 1889 int err; 1890 int i; 1891 1892 if (cur_qps > newqps) { 1893 err = modify_rqt(ndev, 2 * newqps); 1894 if (err) 1895 return err; 1896 1897 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1898 teardown_vq(ndev, &ndev->vqs[i]); 1899 1900 ndev->cur_num_vqs = 2 * newqps; 1901 } else { 1902 ndev->cur_num_vqs = 2 * newqps; 1903 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1904 err = setup_vq(ndev, &ndev->vqs[i]); 1905 if (err) 1906 goto clean_added; 1907 } 1908 err = modify_rqt(ndev, 2 * newqps); 1909 if (err) 1910 goto clean_added; 1911 } 1912 return 0; 1913 1914 clean_added: 1915 for (--i; i >= 2 * cur_qps; --i) 1916 teardown_vq(ndev, &ndev->vqs[i]); 1917 1918 ndev->cur_num_vqs = 2 * cur_qps; 1919 1920 return err; 1921 } 1922 1923 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1924 { 1925 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1926 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1927 struct mlx5_control_vq *cvq = &mvdev->cvq; 1928 struct virtio_net_ctrl_mq mq; 1929 size_t read; 1930 u16 newqps; 1931 1932 switch (cmd) { 1933 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1934 /* This mq feature check aligns with pre-existing userspace 1935 * implementation. 1936 * 1937 * Without it, an untrusted driver could fake a multiqueue config 1938 * request down to a non-mq device that may cause kernel to 1939 * panic due to uninitialized resources for extra vqs. Even with 1940 * a well behaving guest driver, it is not expected to allow 1941 * changing the number of vqs on a non-mq device. 1942 */ 1943 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1944 break; 1945 1946 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1947 if (read != sizeof(mq)) 1948 break; 1949 1950 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1951 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1952 newqps > ndev->rqt_size) 1953 break; 1954 1955 if (ndev->cur_num_vqs == 2 * newqps) { 1956 status = VIRTIO_NET_OK; 1957 break; 1958 } 1959 1960 if (!change_num_qps(mvdev, newqps)) 1961 status = VIRTIO_NET_OK; 1962 1963 break; 1964 default: 1965 break; 1966 } 1967 1968 return status; 1969 } 1970 1971 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1972 { 1973 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1974 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1975 struct mlx5_control_vq *cvq = &mvdev->cvq; 1976 __virtio16 vlan; 1977 size_t read; 1978 u16 id; 1979 1980 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 1981 return status; 1982 1983 switch (cmd) { 1984 case VIRTIO_NET_CTRL_VLAN_ADD: 1985 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1986 if (read != sizeof(vlan)) 1987 break; 1988 1989 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1990 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1991 break; 1992 1993 status = VIRTIO_NET_OK; 1994 break; 1995 case VIRTIO_NET_CTRL_VLAN_DEL: 1996 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1997 if (read != sizeof(vlan)) 1998 break; 1999 2000 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2001 mac_vlan_del(ndev, ndev->config.mac, id, true); 2002 status = VIRTIO_NET_OK; 2003 break; 2004 default: 2005 break; 2006 } 2007 2008 return status; 2009 } 2010 2011 static void mlx5_cvq_kick_handler(struct work_struct *work) 2012 { 2013 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2014 struct virtio_net_ctrl_hdr ctrl; 2015 struct mlx5_vdpa_wq_ent *wqent; 2016 struct mlx5_vdpa_dev *mvdev; 2017 struct mlx5_control_vq *cvq; 2018 struct mlx5_vdpa_net *ndev; 2019 size_t read, write; 2020 int err; 2021 2022 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2023 mvdev = wqent->mvdev; 2024 ndev = to_mlx5_vdpa_ndev(mvdev); 2025 cvq = &mvdev->cvq; 2026 2027 down_write(&ndev->reslock); 2028 2029 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2030 goto out; 2031 2032 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2033 goto out; 2034 2035 if (!cvq->ready) 2036 goto out; 2037 2038 while (true) { 2039 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2040 GFP_ATOMIC); 2041 if (err <= 0) 2042 break; 2043 2044 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2045 if (read != sizeof(ctrl)) 2046 break; 2047 2048 cvq->received_desc++; 2049 switch (ctrl.class) { 2050 case VIRTIO_NET_CTRL_MAC: 2051 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2052 break; 2053 case VIRTIO_NET_CTRL_MQ: 2054 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2055 break; 2056 case VIRTIO_NET_CTRL_VLAN: 2057 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2058 break; 2059 default: 2060 break; 2061 } 2062 2063 /* Make sure data is written before advancing index */ 2064 smp_wmb(); 2065 2066 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2067 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2068 vringh_kiov_cleanup(&cvq->riov); 2069 vringh_kiov_cleanup(&cvq->wiov); 2070 2071 if (vringh_need_notify_iotlb(&cvq->vring)) 2072 vringh_notify(&cvq->vring); 2073 2074 cvq->completed_desc++; 2075 queue_work(mvdev->wq, &wqent->work); 2076 break; 2077 } 2078 2079 out: 2080 up_write(&ndev->reslock); 2081 } 2082 2083 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2084 { 2085 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2086 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2087 struct mlx5_vdpa_virtqueue *mvq; 2088 2089 if (!is_index_valid(mvdev, idx)) 2090 return; 2091 2092 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2093 if (!mvdev->wq || !mvdev->cvq.ready) 2094 return; 2095 2096 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2097 return; 2098 } 2099 2100 mvq = &ndev->vqs[idx]; 2101 if (unlikely(!mvq->ready)) 2102 return; 2103 2104 iowrite16(idx, ndev->mvdev.res.kick_addr); 2105 } 2106 2107 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2108 u64 driver_area, u64 device_area) 2109 { 2110 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2111 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2112 struct mlx5_vdpa_virtqueue *mvq; 2113 2114 if (!is_index_valid(mvdev, idx)) 2115 return -EINVAL; 2116 2117 if (is_ctrl_vq_idx(mvdev, idx)) { 2118 mvdev->cvq.desc_addr = desc_area; 2119 mvdev->cvq.device_addr = device_area; 2120 mvdev->cvq.driver_addr = driver_area; 2121 return 0; 2122 } 2123 2124 mvq = &ndev->vqs[idx]; 2125 mvq->desc_addr = desc_area; 2126 mvq->device_addr = device_area; 2127 mvq->driver_addr = driver_area; 2128 return 0; 2129 } 2130 2131 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2132 { 2133 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2134 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2135 struct mlx5_vdpa_virtqueue *mvq; 2136 2137 if (!is_index_valid(mvdev, idx)) 2138 return; 2139 2140 if (is_ctrl_vq_idx(mvdev, idx)) { 2141 struct mlx5_control_vq *cvq = &mvdev->cvq; 2142 2143 cvq->vring.vring.num = num; 2144 return; 2145 } 2146 2147 mvq = &ndev->vqs[idx]; 2148 mvq->num_ent = num; 2149 } 2150 2151 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2152 { 2153 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2154 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2155 2156 ndev->event_cbs[idx] = *cb; 2157 if (is_ctrl_vq_idx(mvdev, idx)) 2158 mvdev->cvq.event_cb = *cb; 2159 } 2160 2161 static void mlx5_cvq_notify(struct vringh *vring) 2162 { 2163 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2164 2165 if (!cvq->event_cb.callback) 2166 return; 2167 2168 cvq->event_cb.callback(cvq->event_cb.private); 2169 } 2170 2171 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2172 { 2173 struct mlx5_control_vq *cvq = &mvdev->cvq; 2174 2175 cvq->ready = ready; 2176 if (!ready) 2177 return; 2178 2179 cvq->vring.notify = mlx5_cvq_notify; 2180 } 2181 2182 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2183 { 2184 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2185 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2186 struct mlx5_vdpa_virtqueue *mvq; 2187 int err; 2188 2189 if (!mvdev->actual_features) 2190 return; 2191 2192 if (!is_index_valid(mvdev, idx)) 2193 return; 2194 2195 if (is_ctrl_vq_idx(mvdev, idx)) { 2196 set_cvq_ready(mvdev, ready); 2197 return; 2198 } 2199 2200 mvq = &ndev->vqs[idx]; 2201 if (!ready) { 2202 suspend_vq(ndev, mvq); 2203 } else { 2204 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2205 if (err) { 2206 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2207 ready = false; 2208 } 2209 } 2210 2211 2212 mvq->ready = ready; 2213 } 2214 2215 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2216 { 2217 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2218 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2219 2220 if (!is_index_valid(mvdev, idx)) 2221 return false; 2222 2223 if (is_ctrl_vq_idx(mvdev, idx)) 2224 return mvdev->cvq.ready; 2225 2226 return ndev->vqs[idx].ready; 2227 } 2228 2229 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2230 const struct vdpa_vq_state *state) 2231 { 2232 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2233 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2234 struct mlx5_vdpa_virtqueue *mvq; 2235 2236 if (!is_index_valid(mvdev, idx)) 2237 return -EINVAL; 2238 2239 if (is_ctrl_vq_idx(mvdev, idx)) { 2240 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2241 return 0; 2242 } 2243 2244 mvq = &ndev->vqs[idx]; 2245 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2246 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2247 return -EINVAL; 2248 } 2249 2250 mvq->used_idx = state->split.avail_index; 2251 mvq->avail_idx = state->split.avail_index; 2252 return 0; 2253 } 2254 2255 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2256 { 2257 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2258 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2259 struct mlx5_vdpa_virtqueue *mvq; 2260 struct mlx5_virtq_attr attr; 2261 int err; 2262 2263 if (!is_index_valid(mvdev, idx)) 2264 return -EINVAL; 2265 2266 if (is_ctrl_vq_idx(mvdev, idx)) { 2267 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2268 return 0; 2269 } 2270 2271 mvq = &ndev->vqs[idx]; 2272 /* If the virtq object was destroyed, use the value saved at 2273 * the last minute of suspend_vq. This caters for userspace 2274 * that cares about emulating the index after vq is stopped. 2275 */ 2276 if (!mvq->initialized) { 2277 /* Firmware returns a wrong value for the available index. 2278 * Since both values should be identical, we take the value of 2279 * used_idx which is reported correctly. 2280 */ 2281 state->split.avail_index = mvq->used_idx; 2282 return 0; 2283 } 2284 2285 err = query_virtqueue(ndev, mvq, &attr); 2286 if (err) { 2287 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2288 return err; 2289 } 2290 state->split.avail_index = attr.used_index; 2291 return 0; 2292 } 2293 2294 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2295 { 2296 return PAGE_SIZE; 2297 } 2298 2299 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2300 { 2301 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2302 2303 if (is_ctrl_vq_idx(mvdev, idx)) 2304 return MLX5_VDPA_CVQ_GROUP; 2305 2306 return MLX5_VDPA_DATAVQ_GROUP; 2307 } 2308 2309 static u64 mlx_to_vritio_features(u16 dev_features) 2310 { 2311 u64 result = 0; 2312 2313 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2314 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2315 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2316 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2317 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2318 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2319 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2320 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2321 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2322 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2323 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2324 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2325 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2326 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2327 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2328 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2329 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2330 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2331 2332 return result; 2333 } 2334 2335 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2336 { 2337 u64 mlx_vdpa_features = 0; 2338 u16 dev_features; 2339 2340 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2341 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2342 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2343 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2344 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2345 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2346 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2347 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2348 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2349 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2350 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2351 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2352 2353 return mlx_vdpa_features; 2354 } 2355 2356 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2357 { 2358 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2359 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2360 2361 print_features(mvdev, ndev->mvdev.mlx_features, false); 2362 return ndev->mvdev.mlx_features; 2363 } 2364 2365 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2366 { 2367 /* Minimum features to expect */ 2368 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2369 return -EOPNOTSUPP; 2370 2371 /* Double check features combination sent down by the driver. 2372 * Fail invalid features due to absence of the depended feature. 2373 * 2374 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2375 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2376 * By failing the invalid features sent down by untrusted drivers, 2377 * we're assured the assumption made upon is_index_valid() and 2378 * is_ctrl_vq_idx() will not be compromised. 2379 */ 2380 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2381 BIT_ULL(VIRTIO_NET_F_MQ)) 2382 return -EINVAL; 2383 2384 return 0; 2385 } 2386 2387 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2388 { 2389 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2390 int err; 2391 int i; 2392 2393 for (i = 0; i < mvdev->max_vqs; i++) { 2394 err = setup_vq(ndev, &ndev->vqs[i]); 2395 if (err) 2396 goto err_vq; 2397 } 2398 2399 return 0; 2400 2401 err_vq: 2402 for (--i; i >= 0; i--) 2403 teardown_vq(ndev, &ndev->vqs[i]); 2404 2405 return err; 2406 } 2407 2408 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2409 { 2410 struct mlx5_vdpa_virtqueue *mvq; 2411 int i; 2412 2413 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2414 mvq = &ndev->vqs[i]; 2415 if (!mvq->initialized) 2416 continue; 2417 2418 teardown_vq(ndev, mvq); 2419 } 2420 } 2421 2422 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2423 { 2424 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2425 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2426 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2427 mvdev->max_idx = mvdev->max_vqs; 2428 } else { 2429 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2430 * CVQ gets index 2 2431 */ 2432 mvdev->max_idx = 2; 2433 } 2434 } else { 2435 /* Two data virtqueues only: one for rx and one for tx */ 2436 mvdev->max_idx = 1; 2437 } 2438 } 2439 2440 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2441 { 2442 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2443 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2444 int err; 2445 2446 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2447 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2448 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2449 if (vport) 2450 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2451 2452 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2453 if (err) 2454 return 0; 2455 2456 return MLX5_GET(query_vport_state_out, out, state); 2457 } 2458 2459 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2460 { 2461 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2462 VPORT_STATE_UP) 2463 return true; 2464 2465 return false; 2466 } 2467 2468 static void update_carrier(struct work_struct *work) 2469 { 2470 struct mlx5_vdpa_wq_ent *wqent; 2471 struct mlx5_vdpa_dev *mvdev; 2472 struct mlx5_vdpa_net *ndev; 2473 2474 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2475 mvdev = wqent->mvdev; 2476 ndev = to_mlx5_vdpa_ndev(mvdev); 2477 if (get_link_state(mvdev)) 2478 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2479 else 2480 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2481 2482 if (ndev->config_cb.callback) 2483 ndev->config_cb.callback(ndev->config_cb.private); 2484 2485 kfree(wqent); 2486 } 2487 2488 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2489 { 2490 struct mlx5_vdpa_wq_ent *wqent; 2491 2492 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2493 if (!wqent) 2494 return -ENOMEM; 2495 2496 wqent->mvdev = &ndev->mvdev; 2497 INIT_WORK(&wqent->work, update_carrier); 2498 queue_work(ndev->mvdev.wq, &wqent->work); 2499 return 0; 2500 } 2501 2502 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2503 { 2504 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2505 struct mlx5_eqe *eqe = param; 2506 int ret = NOTIFY_DONE; 2507 2508 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2509 switch (eqe->sub_type) { 2510 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2511 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2512 if (queue_link_work(ndev)) 2513 return NOTIFY_DONE; 2514 2515 ret = NOTIFY_OK; 2516 break; 2517 default: 2518 return NOTIFY_DONE; 2519 } 2520 return ret; 2521 } 2522 return ret; 2523 } 2524 2525 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2526 { 2527 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2528 return; 2529 2530 ndev->nb.notifier_call = event_handler; 2531 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2532 ndev->nb_registered = true; 2533 queue_link_work(ndev); 2534 } 2535 2536 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2537 { 2538 if (!ndev->nb_registered) 2539 return; 2540 2541 ndev->nb_registered = false; 2542 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2543 if (ndev->mvdev.wq) 2544 flush_workqueue(ndev->mvdev.wq); 2545 } 2546 2547 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2548 { 2549 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2550 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2551 int err; 2552 2553 print_features(mvdev, features, true); 2554 2555 err = verify_driver_features(mvdev, features); 2556 if (err) 2557 return err; 2558 2559 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2560 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2561 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2562 else 2563 ndev->rqt_size = 1; 2564 2565 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 2566 * 5.1.6.5.5 "Device operation in multiqueue mode": 2567 * 2568 * Multiqueue is disabled by default. 2569 * The driver enables multiqueue by sending a command using class 2570 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 2571 * operation, as follows: ... 2572 */ 2573 ndev->cur_num_vqs = 2; 2574 2575 update_cvq_info(mvdev); 2576 return err; 2577 } 2578 2579 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2580 { 2581 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2582 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2583 2584 ndev->config_cb = *cb; 2585 } 2586 2587 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2588 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2589 { 2590 return MLX5_VDPA_MAX_VQ_ENTRIES; 2591 } 2592 2593 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2594 { 2595 return VIRTIO_ID_NET; 2596 } 2597 2598 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2599 { 2600 return PCI_VENDOR_ID_MELLANOX; 2601 } 2602 2603 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2604 { 2605 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2606 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2607 2608 print_status(mvdev, ndev->mvdev.status, false); 2609 return ndev->mvdev.status; 2610 } 2611 2612 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2613 { 2614 struct mlx5_vq_restore_info *ri = &mvq->ri; 2615 struct mlx5_virtq_attr attr = {}; 2616 int err; 2617 2618 if (mvq->initialized) { 2619 err = query_virtqueue(ndev, mvq, &attr); 2620 if (err) 2621 return err; 2622 } 2623 2624 ri->avail_index = attr.available_index; 2625 ri->used_index = attr.used_index; 2626 ri->ready = mvq->ready; 2627 ri->num_ent = mvq->num_ent; 2628 ri->desc_addr = mvq->desc_addr; 2629 ri->device_addr = mvq->device_addr; 2630 ri->driver_addr = mvq->driver_addr; 2631 ri->map = mvq->map; 2632 ri->restore = true; 2633 return 0; 2634 } 2635 2636 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2637 { 2638 int i; 2639 2640 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2641 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2642 save_channel_info(ndev, &ndev->vqs[i]); 2643 } 2644 return 0; 2645 } 2646 2647 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2648 { 2649 int i; 2650 2651 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2652 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2653 } 2654 2655 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2656 { 2657 struct mlx5_vdpa_virtqueue *mvq; 2658 struct mlx5_vq_restore_info *ri; 2659 int i; 2660 2661 mlx5_clear_vqs(ndev); 2662 init_mvqs(ndev); 2663 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2664 mvq = &ndev->vqs[i]; 2665 ri = &mvq->ri; 2666 if (!ri->restore) 2667 continue; 2668 2669 mvq->avail_idx = ri->avail_index; 2670 mvq->used_idx = ri->used_index; 2671 mvq->ready = ri->ready; 2672 mvq->num_ent = ri->num_ent; 2673 mvq->desc_addr = ri->desc_addr; 2674 mvq->device_addr = ri->device_addr; 2675 mvq->driver_addr = ri->driver_addr; 2676 mvq->map = ri->map; 2677 } 2678 } 2679 2680 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2681 struct vhost_iotlb *iotlb, unsigned int asid) 2682 { 2683 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2684 int err; 2685 2686 suspend_vqs(ndev); 2687 err = save_channels_info(ndev); 2688 if (err) 2689 goto err_mr; 2690 2691 teardown_driver(ndev); 2692 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2693 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); 2694 if (err) 2695 goto err_mr; 2696 2697 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2698 goto err_mr; 2699 2700 restore_channels_info(ndev); 2701 err = setup_driver(mvdev); 2702 if (err) 2703 goto err_setup; 2704 2705 return 0; 2706 2707 err_setup: 2708 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2709 err_mr: 2710 return err; 2711 } 2712 2713 /* reslock must be held for this function */ 2714 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2715 { 2716 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2717 int err; 2718 2719 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2720 2721 if (ndev->setup) { 2722 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2723 err = 0; 2724 goto out; 2725 } 2726 mlx5_vdpa_add_debugfs(ndev); 2727 2728 err = read_umem_params(ndev); 2729 if (err) 2730 goto err_setup; 2731 2732 err = setup_virtqueues(mvdev); 2733 if (err) { 2734 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2735 goto err_setup; 2736 } 2737 2738 err = create_rqt(ndev); 2739 if (err) { 2740 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2741 goto err_rqt; 2742 } 2743 2744 err = create_tir(ndev); 2745 if (err) { 2746 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2747 goto err_tir; 2748 } 2749 2750 err = setup_steering(ndev); 2751 if (err) { 2752 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2753 goto err_fwd; 2754 } 2755 ndev->setup = true; 2756 2757 return 0; 2758 2759 err_fwd: 2760 destroy_tir(ndev); 2761 err_tir: 2762 destroy_rqt(ndev); 2763 err_rqt: 2764 teardown_virtqueues(ndev); 2765 err_setup: 2766 mlx5_vdpa_remove_debugfs(ndev); 2767 out: 2768 return err; 2769 } 2770 2771 /* reslock must be held for this function */ 2772 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2773 { 2774 2775 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2776 2777 if (!ndev->setup) 2778 return; 2779 2780 mlx5_vdpa_remove_debugfs(ndev); 2781 teardown_steering(ndev); 2782 destroy_tir(ndev); 2783 destroy_rqt(ndev); 2784 teardown_virtqueues(ndev); 2785 ndev->setup = false; 2786 } 2787 2788 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2789 { 2790 int i; 2791 2792 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2793 ndev->vqs[i].ready = false; 2794 2795 ndev->mvdev.cvq.ready = false; 2796 } 2797 2798 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 2799 { 2800 struct mlx5_control_vq *cvq = &mvdev->cvq; 2801 int err = 0; 2802 2803 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 2804 u16 idx = cvq->vring.last_avail_idx; 2805 2806 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2807 cvq->vring.vring.num, false, 2808 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2809 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2810 (struct vring_used *)(uintptr_t)cvq->device_addr); 2811 2812 if (!err) 2813 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; 2814 } 2815 return err; 2816 } 2817 2818 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2819 { 2820 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2821 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2822 int err; 2823 2824 print_status(mvdev, status, true); 2825 2826 down_write(&ndev->reslock); 2827 2828 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2829 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2830 err = setup_cvq_vring(mvdev); 2831 if (err) { 2832 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2833 goto err_setup; 2834 } 2835 register_link_notifier(ndev); 2836 err = setup_driver(mvdev); 2837 if (err) { 2838 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2839 goto err_driver; 2840 } 2841 } else { 2842 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2843 goto err_clear; 2844 } 2845 } 2846 2847 ndev->mvdev.status = status; 2848 up_write(&ndev->reslock); 2849 return; 2850 2851 err_driver: 2852 unregister_link_notifier(ndev); 2853 err_setup: 2854 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2855 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2856 err_clear: 2857 up_write(&ndev->reslock); 2858 } 2859 2860 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 2861 { 2862 int i; 2863 2864 /* default mapping all groups are mapped to asid 0 */ 2865 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 2866 mvdev->group2asid[i] = 0; 2867 } 2868 2869 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2870 { 2871 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2872 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2873 2874 print_status(mvdev, 0, true); 2875 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2876 2877 down_write(&ndev->reslock); 2878 unregister_link_notifier(ndev); 2879 teardown_driver(ndev); 2880 clear_vqs_ready(ndev); 2881 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2882 ndev->mvdev.status = 0; 2883 ndev->mvdev.suspended = false; 2884 ndev->cur_num_vqs = 0; 2885 ndev->mvdev.cvq.received_desc = 0; 2886 ndev->mvdev.cvq.completed_desc = 0; 2887 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2888 ndev->mvdev.actual_features = 0; 2889 init_group_to_asid_map(mvdev); 2890 ++mvdev->generation; 2891 2892 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2893 if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) 2894 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2895 } 2896 up_write(&ndev->reslock); 2897 2898 return 0; 2899 } 2900 2901 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2902 { 2903 return sizeof(struct virtio_net_config); 2904 } 2905 2906 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2907 unsigned int len) 2908 { 2909 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2910 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2911 2912 if (offset + len <= sizeof(struct virtio_net_config)) 2913 memcpy(buf, (u8 *)&ndev->config + offset, len); 2914 } 2915 2916 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2917 unsigned int len) 2918 { 2919 /* not supported */ 2920 } 2921 2922 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2923 { 2924 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2925 2926 return mvdev->generation; 2927 } 2928 2929 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 2930 unsigned int asid) 2931 { 2932 bool change_map; 2933 int err; 2934 2935 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); 2936 if (err) { 2937 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2938 return err; 2939 } 2940 2941 if (change_map) 2942 err = mlx5_vdpa_change_map(mvdev, iotlb, asid); 2943 2944 return err; 2945 } 2946 2947 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2948 struct vhost_iotlb *iotlb) 2949 { 2950 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2951 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2952 int err = -EINVAL; 2953 2954 down_write(&ndev->reslock); 2955 err = set_map_data(mvdev, iotlb, asid); 2956 up_write(&ndev->reslock); 2957 return err; 2958 } 2959 2960 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 2961 { 2962 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2963 2964 if (is_ctrl_vq_idx(mvdev, idx)) 2965 return &vdev->dev; 2966 2967 return mvdev->vdev.dma_dev; 2968 } 2969 2970 static void free_irqs(struct mlx5_vdpa_net *ndev) 2971 { 2972 struct mlx5_vdpa_irq_pool_entry *ent; 2973 int i; 2974 2975 if (!msix_mode_supported(&ndev->mvdev)) 2976 return; 2977 2978 if (!ndev->irqp.entries) 2979 return; 2980 2981 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 2982 ent = ndev->irqp.entries + i; 2983 if (ent->map.virq) 2984 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 2985 } 2986 kfree(ndev->irqp.entries); 2987 } 2988 2989 static void mlx5_vdpa_free(struct vdpa_device *vdev) 2990 { 2991 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2992 struct mlx5_core_dev *pfmdev; 2993 struct mlx5_vdpa_net *ndev; 2994 2995 ndev = to_mlx5_vdpa_ndev(mvdev); 2996 2997 free_resources(ndev); 2998 mlx5_vdpa_destroy_mr(mvdev); 2999 if (!is_zero_ether_addr(ndev->config.mac)) { 3000 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 3001 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 3002 } 3003 mlx5_vdpa_free_resources(&ndev->mvdev); 3004 free_irqs(ndev); 3005 kfree(ndev->event_cbs); 3006 kfree(ndev->vqs); 3007 } 3008 3009 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3010 { 3011 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3012 struct vdpa_notification_area ret = {}; 3013 struct mlx5_vdpa_net *ndev; 3014 phys_addr_t addr; 3015 3016 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3017 return ret; 3018 3019 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3020 * notification to avoid the risk of mapping pages that contain BAR of more 3021 * than one SF 3022 */ 3023 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3024 return ret; 3025 3026 ndev = to_mlx5_vdpa_ndev(mvdev); 3027 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3028 ret.addr = addr; 3029 ret.size = PAGE_SIZE; 3030 return ret; 3031 } 3032 3033 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3034 { 3035 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3036 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3037 struct mlx5_vdpa_virtqueue *mvq; 3038 3039 if (!is_index_valid(mvdev, idx)) 3040 return -EINVAL; 3041 3042 if (is_ctrl_vq_idx(mvdev, idx)) 3043 return -EOPNOTSUPP; 3044 3045 mvq = &ndev->vqs[idx]; 3046 if (!mvq->map.virq) 3047 return -EOPNOTSUPP; 3048 3049 return mvq->map.virq; 3050 } 3051 3052 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3053 { 3054 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3055 3056 return mvdev->actual_features; 3057 } 3058 3059 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3060 u64 *received_desc, u64 *completed_desc) 3061 { 3062 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3063 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3064 void *cmd_hdr; 3065 void *ctx; 3066 int err; 3067 3068 if (!counters_supported(&ndev->mvdev)) 3069 return -EOPNOTSUPP; 3070 3071 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3072 return -EAGAIN; 3073 3074 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3075 3076 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3077 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3078 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3079 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3080 3081 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3082 if (err) 3083 return err; 3084 3085 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3086 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3087 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3088 return 0; 3089 } 3090 3091 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3092 struct sk_buff *msg, 3093 struct netlink_ext_ack *extack) 3094 { 3095 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3096 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3097 struct mlx5_vdpa_virtqueue *mvq; 3098 struct mlx5_control_vq *cvq; 3099 u64 received_desc; 3100 u64 completed_desc; 3101 int err = 0; 3102 3103 down_read(&ndev->reslock); 3104 if (!is_index_valid(mvdev, idx)) { 3105 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3106 err = -EINVAL; 3107 goto out_err; 3108 } 3109 3110 if (idx == ctrl_vq_idx(mvdev)) { 3111 cvq = &mvdev->cvq; 3112 received_desc = cvq->received_desc; 3113 completed_desc = cvq->completed_desc; 3114 goto out; 3115 } 3116 3117 mvq = &ndev->vqs[idx]; 3118 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3119 if (err) { 3120 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3121 goto out_err; 3122 } 3123 3124 out: 3125 err = -EMSGSIZE; 3126 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3127 goto out_err; 3128 3129 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3130 VDPA_ATTR_PAD)) 3131 goto out_err; 3132 3133 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3134 goto out_err; 3135 3136 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3137 VDPA_ATTR_PAD)) 3138 goto out_err; 3139 3140 err = 0; 3141 out_err: 3142 up_read(&ndev->reslock); 3143 return err; 3144 } 3145 3146 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3147 { 3148 struct mlx5_control_vq *cvq; 3149 3150 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3151 return; 3152 3153 cvq = &mvdev->cvq; 3154 cvq->ready = false; 3155 } 3156 3157 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3158 { 3159 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3160 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3161 struct mlx5_vdpa_virtqueue *mvq; 3162 int i; 3163 3164 mlx5_vdpa_info(mvdev, "suspending device\n"); 3165 3166 down_write(&ndev->reslock); 3167 unregister_link_notifier(ndev); 3168 for (i = 0; i < ndev->cur_num_vqs; i++) { 3169 mvq = &ndev->vqs[i]; 3170 suspend_vq(ndev, mvq); 3171 } 3172 mlx5_vdpa_cvq_suspend(mvdev); 3173 mvdev->suspended = true; 3174 up_write(&ndev->reslock); 3175 return 0; 3176 } 3177 3178 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3179 unsigned int asid) 3180 { 3181 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3182 3183 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3184 return -EINVAL; 3185 3186 mvdev->group2asid[group] = asid; 3187 return 0; 3188 } 3189 3190 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3191 .set_vq_address = mlx5_vdpa_set_vq_address, 3192 .set_vq_num = mlx5_vdpa_set_vq_num, 3193 .kick_vq = mlx5_vdpa_kick_vq, 3194 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3195 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3196 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3197 .set_vq_state = mlx5_vdpa_set_vq_state, 3198 .get_vq_state = mlx5_vdpa_get_vq_state, 3199 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3200 .get_vq_notification = mlx5_get_vq_notification, 3201 .get_vq_irq = mlx5_get_vq_irq, 3202 .get_vq_align = mlx5_vdpa_get_vq_align, 3203 .get_vq_group = mlx5_vdpa_get_vq_group, 3204 .get_device_features = mlx5_vdpa_get_device_features, 3205 .set_driver_features = mlx5_vdpa_set_driver_features, 3206 .get_driver_features = mlx5_vdpa_get_driver_features, 3207 .set_config_cb = mlx5_vdpa_set_config_cb, 3208 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3209 .get_device_id = mlx5_vdpa_get_device_id, 3210 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3211 .get_status = mlx5_vdpa_get_status, 3212 .set_status = mlx5_vdpa_set_status, 3213 .reset = mlx5_vdpa_reset, 3214 .get_config_size = mlx5_vdpa_get_config_size, 3215 .get_config = mlx5_vdpa_get_config, 3216 .set_config = mlx5_vdpa_set_config, 3217 .get_generation = mlx5_vdpa_get_generation, 3218 .set_map = mlx5_vdpa_set_map, 3219 .set_group_asid = mlx5_set_group_asid, 3220 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3221 .free = mlx5_vdpa_free, 3222 .suspend = mlx5_vdpa_suspend, 3223 }; 3224 3225 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3226 { 3227 u16 hw_mtu; 3228 int err; 3229 3230 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3231 if (err) 3232 return err; 3233 3234 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3235 return 0; 3236 } 3237 3238 static int alloc_resources(struct mlx5_vdpa_net *ndev) 3239 { 3240 struct mlx5_vdpa_net_resources *res = &ndev->res; 3241 int err; 3242 3243 if (res->valid) { 3244 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3245 return -EEXIST; 3246 } 3247 3248 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3249 if (err) 3250 return err; 3251 3252 err = create_tis(ndev); 3253 if (err) 3254 goto err_tis; 3255 3256 res->valid = true; 3257 3258 return 0; 3259 3260 err_tis: 3261 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3262 return err; 3263 } 3264 3265 static void free_resources(struct mlx5_vdpa_net *ndev) 3266 { 3267 struct mlx5_vdpa_net_resources *res = &ndev->res; 3268 3269 if (!res->valid) 3270 return; 3271 3272 destroy_tis(ndev); 3273 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3274 res->valid = false; 3275 } 3276 3277 static void init_mvqs(struct mlx5_vdpa_net *ndev) 3278 { 3279 struct mlx5_vdpa_virtqueue *mvq; 3280 int i; 3281 3282 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3283 mvq = &ndev->vqs[i]; 3284 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3285 mvq->index = i; 3286 mvq->ndev = ndev; 3287 mvq->fwqp.fw = true; 3288 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3289 } 3290 for (; i < ndev->mvdev.max_vqs; i++) { 3291 mvq = &ndev->vqs[i]; 3292 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3293 mvq->index = i; 3294 mvq->ndev = ndev; 3295 } 3296 } 3297 3298 struct mlx5_vdpa_mgmtdev { 3299 struct vdpa_mgmt_dev mgtdev; 3300 struct mlx5_adev *madev; 3301 struct mlx5_vdpa_net *ndev; 3302 }; 3303 3304 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3305 { 3306 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3307 void *in; 3308 int err; 3309 3310 in = kvzalloc(inlen, GFP_KERNEL); 3311 if (!in) 3312 return -ENOMEM; 3313 3314 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3315 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3316 mtu + MLX5V_ETH_HARD_MTU); 3317 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3318 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3319 3320 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3321 3322 kvfree(in); 3323 return err; 3324 } 3325 3326 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3327 { 3328 struct mlx5_vdpa_irq_pool_entry *ent; 3329 int i; 3330 3331 if (!msix_mode_supported(&ndev->mvdev)) 3332 return; 3333 3334 if (!ndev->mvdev.mdev->pdev) 3335 return; 3336 3337 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3338 if (!ndev->irqp.entries) 3339 return; 3340 3341 3342 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3343 ent = ndev->irqp.entries + i; 3344 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3345 dev_name(&ndev->mvdev.vdev.dev), i); 3346 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3347 if (!ent->map.virq) 3348 return; 3349 3350 ndev->irqp.num_ent++; 3351 } 3352 } 3353 3354 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3355 const struct vdpa_dev_set_config *add_config) 3356 { 3357 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3358 struct virtio_net_config *config; 3359 struct mlx5_core_dev *pfmdev; 3360 struct mlx5_vdpa_dev *mvdev; 3361 struct mlx5_vdpa_net *ndev; 3362 struct mlx5_core_dev *mdev; 3363 u64 device_features; 3364 u32 max_vqs; 3365 u16 mtu; 3366 int err; 3367 3368 if (mgtdev->ndev) 3369 return -ENOSPC; 3370 3371 mdev = mgtdev->madev->mdev; 3372 device_features = mgtdev->mgtdev.supported_features; 3373 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3374 if (add_config->device_features & ~device_features) { 3375 dev_warn(mdev->device, 3376 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3377 add_config->device_features, device_features); 3378 return -EINVAL; 3379 } 3380 device_features &= add_config->device_features; 3381 } else { 3382 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3383 } 3384 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3385 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3386 dev_warn(mdev->device, 3387 "Must provision minimum features 0x%llx for this device", 3388 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3389 return -EOPNOTSUPP; 3390 } 3391 3392 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3393 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3394 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3395 return -EOPNOTSUPP; 3396 } 3397 3398 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3399 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3400 if (max_vqs < 2) { 3401 dev_warn(mdev->device, 3402 "%d virtqueues are supported. At least 2 are required\n", 3403 max_vqs); 3404 return -EAGAIN; 3405 } 3406 3407 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3408 if (add_config->net.max_vq_pairs > max_vqs / 2) 3409 return -EINVAL; 3410 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3411 } else { 3412 max_vqs = 2; 3413 } 3414 3415 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 3416 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3417 if (IS_ERR(ndev)) 3418 return PTR_ERR(ndev); 3419 3420 ndev->mvdev.max_vqs = max_vqs; 3421 mvdev = &ndev->mvdev; 3422 mvdev->mdev = mdev; 3423 3424 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3425 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3426 if (!ndev->vqs || !ndev->event_cbs) { 3427 err = -ENOMEM; 3428 goto err_alloc; 3429 } 3430 3431 init_mvqs(ndev); 3432 allocate_irqs(ndev); 3433 init_rwsem(&ndev->reslock); 3434 config = &ndev->config; 3435 3436 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3437 err = config_func_mtu(mdev, add_config->net.mtu); 3438 if (err) 3439 goto err_alloc; 3440 } 3441 3442 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3443 err = query_mtu(mdev, &mtu); 3444 if (err) 3445 goto err_alloc; 3446 3447 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3448 } 3449 3450 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3451 if (get_link_state(mvdev)) 3452 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3453 else 3454 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3455 } 3456 3457 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3458 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3459 /* No bother setting mac address in config if not going to provision _F_MAC */ 3460 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3461 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3462 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3463 if (err) 3464 goto err_alloc; 3465 } 3466 3467 if (!is_zero_ether_addr(config->mac)) { 3468 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3469 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3470 if (err) 3471 goto err_alloc; 3472 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3473 /* 3474 * We used to clear _F_MAC feature bit if seeing 3475 * zero mac address when device features are not 3476 * specifically provisioned. Keep the behaviour 3477 * so old scripts do not break. 3478 */ 3479 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3480 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3481 /* Don't provision zero mac address for _F_MAC */ 3482 mlx5_vdpa_warn(&ndev->mvdev, 3483 "No mac address provisioned?\n"); 3484 err = -EINVAL; 3485 goto err_alloc; 3486 } 3487 3488 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) 3489 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3490 3491 ndev->mvdev.mlx_features = device_features; 3492 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3493 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3494 if (err) 3495 goto err_mpfs; 3496 3497 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3498 err = mlx5_vdpa_create_mr(mvdev, NULL, 0); 3499 if (err) 3500 goto err_res; 3501 } 3502 3503 err = alloc_resources(ndev); 3504 if (err) 3505 goto err_mr; 3506 3507 ndev->cvq_ent.mvdev = mvdev; 3508 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3509 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3510 if (!mvdev->wq) { 3511 err = -ENOMEM; 3512 goto err_res2; 3513 } 3514 3515 mvdev->vdev.mdev = &mgtdev->mgtdev; 3516 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3517 if (err) 3518 goto err_reg; 3519 3520 mgtdev->ndev = ndev; 3521 return 0; 3522 3523 err_reg: 3524 destroy_workqueue(mvdev->wq); 3525 err_res2: 3526 free_resources(ndev); 3527 err_mr: 3528 mlx5_vdpa_destroy_mr(mvdev); 3529 err_res: 3530 mlx5_vdpa_free_resources(&ndev->mvdev); 3531 err_mpfs: 3532 if (!is_zero_ether_addr(config->mac)) 3533 mlx5_mpfs_del_mac(pfmdev, config->mac); 3534 err_alloc: 3535 put_device(&mvdev->vdev.dev); 3536 return err; 3537 } 3538 3539 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3540 { 3541 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3542 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3543 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3544 struct workqueue_struct *wq; 3545 3546 unregister_link_notifier(ndev); 3547 _vdpa_unregister_device(dev); 3548 wq = mvdev->wq; 3549 mvdev->wq = NULL; 3550 destroy_workqueue(wq); 3551 mgtdev->ndev = NULL; 3552 } 3553 3554 static const struct vdpa_mgmtdev_ops mdev_ops = { 3555 .dev_add = mlx5_vdpa_dev_add, 3556 .dev_del = mlx5_vdpa_dev_del, 3557 }; 3558 3559 static struct virtio_device_id id_table[] = { 3560 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3561 { 0 }, 3562 }; 3563 3564 static int mlx5v_probe(struct auxiliary_device *adev, 3565 const struct auxiliary_device_id *id) 3566 3567 { 3568 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3569 struct mlx5_core_dev *mdev = madev->mdev; 3570 struct mlx5_vdpa_mgmtdev *mgtdev; 3571 int err; 3572 3573 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3574 if (!mgtdev) 3575 return -ENOMEM; 3576 3577 mgtdev->mgtdev.ops = &mdev_ops; 3578 mgtdev->mgtdev.device = mdev->device; 3579 mgtdev->mgtdev.id_table = id_table; 3580 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3581 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3582 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3583 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3584 mgtdev->mgtdev.max_supported_vqs = 3585 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3586 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3587 mgtdev->madev = madev; 3588 3589 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3590 if (err) 3591 goto reg_err; 3592 3593 auxiliary_set_drvdata(adev, mgtdev); 3594 3595 return 0; 3596 3597 reg_err: 3598 kfree(mgtdev); 3599 return err; 3600 } 3601 3602 static void mlx5v_remove(struct auxiliary_device *adev) 3603 { 3604 struct mlx5_vdpa_mgmtdev *mgtdev; 3605 3606 mgtdev = auxiliary_get_drvdata(adev); 3607 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3608 kfree(mgtdev); 3609 } 3610 3611 static const struct auxiliary_device_id mlx5v_id_table[] = { 3612 { .name = MLX5_ADEV_NAME ".vnet", }, 3613 {}, 3614 }; 3615 3616 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3617 3618 static struct auxiliary_driver mlx5v_driver = { 3619 .name = "vnet", 3620 .probe = mlx5v_probe, 3621 .remove = mlx5v_remove, 3622 .id_table = mlx5v_id_table, 3623 }; 3624 3625 module_auxiliary_driver(mlx5v_driver); 3626