1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <linux/virtio_config.h> 11 #include <linux/auxiliary_bus.h> 12 #include <linux/mlx5/cq.h> 13 #include <linux/mlx5/qp.h> 14 #include <linux/mlx5/device.h> 15 #include <linux/mlx5/driver.h> 16 #include <linux/mlx5/vport.h> 17 #include <linux/mlx5/fs.h> 18 #include <linux/mlx5/mlx5_ifc_vdpa.h> 19 #include <linux/mlx5/mpfs.h> 20 #include "mlx5_vdpa.h" 21 #include "mlx5_vnet.h" 22 23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 24 MODULE_DESCRIPTION("Mellanox VDPA driver"); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 27 #define VALID_FEATURES_MASK \ 28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 41 42 #define VALID_STATUS_MASK \ 43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 45 46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 47 48 #define MLX5V_UNTAGGED 0x1000 49 50 struct mlx5_vdpa_cq_buf { 51 struct mlx5_frag_buf_ctrl fbc; 52 struct mlx5_frag_buf frag_buf; 53 int cqe_size; 54 int nent; 55 }; 56 57 struct mlx5_vdpa_cq { 58 struct mlx5_core_cq mcq; 59 struct mlx5_vdpa_cq_buf buf; 60 struct mlx5_db db; 61 int cqe; 62 }; 63 64 struct mlx5_vdpa_umem { 65 struct mlx5_frag_buf_ctrl fbc; 66 struct mlx5_frag_buf frag_buf; 67 int size; 68 u32 id; 69 }; 70 71 struct mlx5_vdpa_qp { 72 struct mlx5_core_qp mqp; 73 struct mlx5_frag_buf frag_buf; 74 struct mlx5_db db; 75 u16 head; 76 bool fw; 77 }; 78 79 struct mlx5_vq_restore_info { 80 u32 num_ent; 81 u64 desc_addr; 82 u64 device_addr; 83 u64 driver_addr; 84 u16 avail_index; 85 u16 used_index; 86 struct msi_map map; 87 bool ready; 88 bool restore; 89 }; 90 91 struct mlx5_vdpa_virtqueue { 92 bool ready; 93 u64 desc_addr; 94 u64 device_addr; 95 u64 driver_addr; 96 u32 num_ent; 97 98 /* Resources for implementing the notification channel from the device 99 * to the driver. fwqp is the firmware end of an RC connection; the 100 * other end is vqqp used by the driver. cq is where completions are 101 * reported. 102 */ 103 struct mlx5_vdpa_cq cq; 104 struct mlx5_vdpa_qp fwqp; 105 struct mlx5_vdpa_qp vqqp; 106 107 /* umem resources are required for the virtqueue operation. They're use 108 * is internal and they must be provided by the driver. 109 */ 110 struct mlx5_vdpa_umem umem1; 111 struct mlx5_vdpa_umem umem2; 112 struct mlx5_vdpa_umem umem3; 113 114 u32 counter_set_id; 115 bool initialized; 116 int index; 117 u32 virtq_id; 118 struct mlx5_vdpa_net *ndev; 119 u16 avail_idx; 120 u16 used_idx; 121 int fw_state; 122 struct msi_map map; 123 124 /* keep last in the struct */ 125 struct mlx5_vq_restore_info ri; 126 }; 127 128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 129 { 130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 132 return idx < 2; 133 else 134 return idx < 3; 135 } 136 137 return idx <= mvdev->max_idx; 138 } 139 140 static void free_resources(struct mlx5_vdpa_net *ndev); 141 static void init_mvqs(struct mlx5_vdpa_net *ndev); 142 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 143 static void teardown_driver(struct mlx5_vdpa_net *ndev); 144 145 static bool mlx5_vdpa_debug; 146 147 #define MLX5_CVQ_MAX_ENT 16 148 149 #define MLX5_LOG_VIO_FLAG(_feature) \ 150 do { \ 151 if (features & BIT_ULL(_feature)) \ 152 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 153 } while (0) 154 155 #define MLX5_LOG_VIO_STAT(_status) \ 156 do { \ 157 if (status & (_status)) \ 158 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 159 } while (0) 160 161 /* TODO: cross-endian support */ 162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 163 { 164 return virtio_legacy_is_little_endian() || 165 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 166 } 167 168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 169 { 170 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 171 } 172 173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 174 { 175 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 176 } 177 178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 179 { 180 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 181 return 2; 182 183 return mvdev->max_vqs; 184 } 185 186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 187 { 188 return idx == ctrl_vq_idx(mvdev); 189 } 190 191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 192 { 193 if (status & ~VALID_STATUS_MASK) 194 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 195 status & ~VALID_STATUS_MASK); 196 197 if (!mlx5_vdpa_debug) 198 return; 199 200 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 201 if (set && !status) { 202 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 203 return; 204 } 205 206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 212 } 213 214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 215 { 216 if (features & ~VALID_FEATURES_MASK) 217 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 218 features & ~VALID_FEATURES_MASK); 219 220 if (!mlx5_vdpa_debug) 221 return; 222 223 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 224 if (!features) 225 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 226 227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 254 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 256 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 258 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 259 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 260 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 261 } 262 263 static int create_tis(struct mlx5_vdpa_net *ndev) 264 { 265 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 266 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 267 void *tisc; 268 int err; 269 270 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 271 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 272 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 273 if (err) 274 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 275 276 return err; 277 } 278 279 static void destroy_tis(struct mlx5_vdpa_net *ndev) 280 { 281 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 282 } 283 284 #define MLX5_VDPA_CQE_SIZE 64 285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 286 287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 288 { 289 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 290 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 291 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 292 int err; 293 294 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 295 ndev->mvdev.mdev->priv.numa_node); 296 if (err) 297 return err; 298 299 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 300 301 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 302 buf->nent = nent; 303 304 return 0; 305 } 306 307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 308 { 309 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 310 311 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 312 ndev->mvdev.mdev->priv.numa_node); 313 } 314 315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 316 { 317 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 318 } 319 320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 321 { 322 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 323 } 324 325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 326 { 327 struct mlx5_cqe64 *cqe64; 328 void *cqe; 329 int i; 330 331 for (i = 0; i < buf->nent; i++) { 332 cqe = get_cqe(vcq, i); 333 cqe64 = cqe; 334 cqe64->op_own = MLX5_CQE_INVALID << 4; 335 } 336 } 337 338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 339 { 340 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 341 342 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 343 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 344 return cqe64; 345 346 return NULL; 347 } 348 349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 350 { 351 vqp->head += n; 352 vqp->db.db[0] = cpu_to_be32(vqp->head); 353 } 354 355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 356 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 357 { 358 struct mlx5_vdpa_qp *vqp; 359 __be64 *pas; 360 void *qpc; 361 362 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 363 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 364 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 365 if (vqp->fw) { 366 /* Firmware QP is allocated by the driver for the firmware's 367 * use so we can skip part of the params as they will be chosen by firmware 368 */ 369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 370 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 371 MLX5_SET(qpc, qpc, no_sq, 1); 372 return; 373 } 374 375 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 376 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 377 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 378 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 379 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 380 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 381 MLX5_SET(qpc, qpc, no_sq, 1); 382 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 383 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 384 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 385 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 386 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 387 } 388 389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 390 { 391 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 392 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 393 ndev->mvdev.mdev->priv.numa_node); 394 } 395 396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 397 { 398 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 399 } 400 401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 402 struct mlx5_vdpa_qp *vqp) 403 { 404 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 405 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 406 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 407 void *qpc; 408 void *in; 409 int err; 410 411 if (!vqp->fw) { 412 vqp = &mvq->vqqp; 413 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 414 if (err) 415 return err; 416 417 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 418 if (err) 419 goto err_db; 420 inlen += vqp->frag_buf.npages * sizeof(__be64); 421 } 422 423 in = kzalloc(inlen, GFP_KERNEL); 424 if (!in) { 425 err = -ENOMEM; 426 goto err_kzalloc; 427 } 428 429 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 430 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 431 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 432 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 433 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 434 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 435 if (!vqp->fw) 436 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 437 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 438 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 439 kfree(in); 440 if (err) 441 goto err_kzalloc; 442 443 vqp->mqp.uid = ndev->mvdev.res.uid; 444 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 445 446 if (!vqp->fw) 447 rx_post(vqp, mvq->num_ent); 448 449 return 0; 450 451 err_kzalloc: 452 if (!vqp->fw) 453 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 454 err_db: 455 if (!vqp->fw) 456 rq_buf_free(ndev, vqp); 457 458 return err; 459 } 460 461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 462 { 463 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 464 465 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 466 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 467 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 468 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 469 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 470 if (!vqp->fw) { 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 rq_buf_free(ndev, vqp); 473 } 474 } 475 476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 477 { 478 return get_sw_cqe(cq, cq->mcq.cons_index); 479 } 480 481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 482 { 483 struct mlx5_cqe64 *cqe64; 484 485 cqe64 = next_cqe_sw(vcq); 486 if (!cqe64) 487 return -EAGAIN; 488 489 vcq->mcq.cons_index++; 490 return 0; 491 } 492 493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 494 { 495 struct mlx5_vdpa_net *ndev = mvq->ndev; 496 struct vdpa_callback *event_cb; 497 498 event_cb = &ndev->event_cbs[mvq->index]; 499 mlx5_cq_set_ci(&mvq->cq.mcq); 500 501 /* make sure CQ cosumer update is visible to the hardware before updating 502 * RX doorbell record. 503 */ 504 dma_wmb(); 505 rx_post(&mvq->vqqp, num); 506 if (event_cb->callback) 507 event_cb->callback(event_cb->private); 508 } 509 510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 511 { 512 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 void __iomem *uar_page = ndev->mvdev.res.uar->map; 515 int num = 0; 516 517 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 518 num++; 519 if (num > mvq->num_ent / 2) { 520 /* If completions keep coming while we poll, we want to 521 * let the hardware know that we consumed them by 522 * updating the doorbell record. We also let vdpa core 523 * know about this so it passes it on the virtio driver 524 * on the guest. 525 */ 526 mlx5_vdpa_handle_completions(mvq, num); 527 num = 0; 528 } 529 } 530 531 if (num) 532 mlx5_vdpa_handle_completions(mvq, num); 533 534 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 535 } 536 537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 538 { 539 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 540 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 541 void __iomem *uar_page = ndev->mvdev.res.uar->map; 542 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 543 struct mlx5_vdpa_cq *vcq = &mvq->cq; 544 __be64 *pas; 545 int inlen; 546 void *cqc; 547 void *in; 548 int err; 549 int eqn; 550 551 err = mlx5_db_alloc(mdev, &vcq->db); 552 if (err) 553 return err; 554 555 vcq->mcq.set_ci_db = vcq->db.db; 556 vcq->mcq.arm_db = vcq->db.db + 1; 557 vcq->mcq.cqe_sz = 64; 558 559 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 560 if (err) 561 goto err_db; 562 563 cq_frag_buf_init(vcq, &vcq->buf); 564 565 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 566 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 567 in = kzalloc(inlen, GFP_KERNEL); 568 if (!in) { 569 err = -ENOMEM; 570 goto err_vzalloc; 571 } 572 573 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 574 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 575 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 576 577 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 578 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 579 580 /* Use vector 0 by default. Consider adding code to choose least used 581 * vector. 582 */ 583 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 584 if (err) 585 goto err_vec; 586 587 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 588 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 589 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 590 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 591 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 592 593 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 594 if (err) 595 goto err_vec; 596 597 vcq->mcq.comp = mlx5_vdpa_cq_comp; 598 vcq->cqe = num_ent; 599 vcq->mcq.set_ci_db = vcq->db.db; 600 vcq->mcq.arm_db = vcq->db.db + 1; 601 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 602 kfree(in); 603 return 0; 604 605 err_vec: 606 kfree(in); 607 err_vzalloc: 608 cq_frag_buf_free(ndev, &vcq->buf); 609 err_db: 610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 611 return err; 612 } 613 614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 615 { 616 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 618 struct mlx5_vdpa_cq *vcq = &mvq->cq; 619 620 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 621 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 622 return; 623 } 624 cq_frag_buf_free(ndev, &vcq->buf); 625 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 626 } 627 628 static int read_umem_params(struct mlx5_vdpa_net *ndev) 629 { 630 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 631 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 632 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 633 int out_size; 634 void *caps; 635 void *out; 636 int err; 637 638 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 639 out = kzalloc(out_size, GFP_KERNEL); 640 if (!out) 641 return -ENOMEM; 642 643 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 644 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 645 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 646 if (err) { 647 mlx5_vdpa_warn(&ndev->mvdev, 648 "Failed reading vdpa umem capabilities with err %d\n", err); 649 goto out; 650 } 651 652 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 653 654 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 655 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 656 657 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 658 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 659 660 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 661 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 662 663 out: 664 kfree(out); 665 return 0; 666 } 667 668 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 669 struct mlx5_vdpa_umem **umemp) 670 { 671 u32 p_a; 672 u32 p_b; 673 674 switch (num) { 675 case 1: 676 p_a = ndev->umem_1_buffer_param_a; 677 p_b = ndev->umem_1_buffer_param_b; 678 *umemp = &mvq->umem1; 679 break; 680 case 2: 681 p_a = ndev->umem_2_buffer_param_a; 682 p_b = ndev->umem_2_buffer_param_b; 683 *umemp = &mvq->umem2; 684 break; 685 case 3: 686 p_a = ndev->umem_3_buffer_param_a; 687 p_b = ndev->umem_3_buffer_param_b; 688 *umemp = &mvq->umem3; 689 break; 690 } 691 692 (*umemp)->size = p_a * mvq->num_ent + p_b; 693 } 694 695 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 696 { 697 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 698 } 699 700 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 701 { 702 int inlen; 703 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 704 void *um; 705 void *in; 706 int err; 707 __be64 *pas; 708 struct mlx5_vdpa_umem *umem; 709 710 set_umem_size(ndev, mvq, num, &umem); 711 err = umem_frag_buf_alloc(ndev, umem, umem->size); 712 if (err) 713 return err; 714 715 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 716 717 in = kzalloc(inlen, GFP_KERNEL); 718 if (!in) { 719 err = -ENOMEM; 720 goto err_in; 721 } 722 723 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 724 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 725 um = MLX5_ADDR_OF(create_umem_in, in, umem); 726 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 727 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 728 729 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 730 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 731 732 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 733 if (err) { 734 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 735 goto err_cmd; 736 } 737 738 kfree(in); 739 umem->id = MLX5_GET(create_umem_out, out, umem_id); 740 741 return 0; 742 743 err_cmd: 744 kfree(in); 745 err_in: 746 umem_frag_buf_free(ndev, umem); 747 return err; 748 } 749 750 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 751 { 752 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 753 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 754 struct mlx5_vdpa_umem *umem; 755 756 switch (num) { 757 case 1: 758 umem = &mvq->umem1; 759 break; 760 case 2: 761 umem = &mvq->umem2; 762 break; 763 case 3: 764 umem = &mvq->umem3; 765 break; 766 } 767 768 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 769 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 770 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 771 return; 772 773 umem_frag_buf_free(ndev, umem); 774 } 775 776 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 777 { 778 int num; 779 int err; 780 781 for (num = 1; num <= 3; num++) { 782 err = create_umem(ndev, mvq, num); 783 if (err) 784 goto err_umem; 785 } 786 return 0; 787 788 err_umem: 789 for (num--; num > 0; num--) 790 umem_destroy(ndev, mvq, num); 791 792 return err; 793 } 794 795 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 796 { 797 int num; 798 799 for (num = 3; num > 0; num--) 800 umem_destroy(ndev, mvq, num); 801 } 802 803 static int get_queue_type(struct mlx5_vdpa_net *ndev) 804 { 805 u32 type_mask; 806 807 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 808 809 /* prefer split queue */ 810 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 811 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 812 813 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 814 815 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 816 } 817 818 static bool vq_is_tx(u16 idx) 819 { 820 return idx % 2; 821 } 822 823 enum { 824 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 825 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 826 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 827 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 828 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 829 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 830 MLX5_VIRTIO_NET_F_CSUM = 10, 831 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 832 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 833 }; 834 835 static u16 get_features(u64 features) 836 { 837 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 838 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 839 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 840 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 841 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 842 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 843 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 844 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 845 } 846 847 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 848 { 849 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 850 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 851 } 852 853 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 854 { 855 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 856 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 857 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 858 } 859 860 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 861 { 862 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 863 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 864 void *obj_context; 865 u16 mlx_features; 866 void *cmd_hdr; 867 void *vq_ctx; 868 void *in; 869 int err; 870 871 err = umems_create(ndev, mvq); 872 if (err) 873 return err; 874 875 in = kzalloc(inlen, GFP_KERNEL); 876 if (!in) { 877 err = -ENOMEM; 878 goto err_alloc; 879 } 880 881 mlx_features = get_features(ndev->mvdev.actual_features); 882 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 883 884 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 885 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 886 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 887 888 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 889 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 890 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 891 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 892 mlx_features >> 3); 893 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 894 mlx_features & 7); 895 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 896 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 897 898 if (vq_is_tx(mvq->index)) 899 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 900 901 if (mvq->map.virq) { 902 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 903 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 904 } else { 905 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 906 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 907 } 908 909 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 910 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 911 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 912 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 913 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 914 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 915 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 916 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); 917 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 918 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 919 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 920 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 921 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 922 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 923 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 924 if (counters_supported(&ndev->mvdev)) 925 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 926 927 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 928 if (err) 929 goto err_cmd; 930 931 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 932 kfree(in); 933 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 934 935 return 0; 936 937 err_cmd: 938 kfree(in); 939 err_alloc: 940 umems_destroy(ndev, mvq); 941 return err; 942 } 943 944 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 945 { 946 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 947 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 948 949 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 950 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 951 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 952 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 953 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 954 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 955 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 956 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 957 return; 958 } 959 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 960 umems_destroy(ndev, mvq); 961 } 962 963 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 964 { 965 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 966 } 967 968 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 969 { 970 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 971 } 972 973 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 974 int *outlen, u32 qpn, u32 rqpn) 975 { 976 void *qpc; 977 void *pp; 978 979 switch (cmd) { 980 case MLX5_CMD_OP_2RST_QP: 981 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 982 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 983 *in = kzalloc(*inlen, GFP_KERNEL); 984 *out = kzalloc(*outlen, GFP_KERNEL); 985 if (!*in || !*out) 986 goto outerr; 987 988 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 989 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 990 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 991 break; 992 case MLX5_CMD_OP_RST2INIT_QP: 993 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 994 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 995 *in = kzalloc(*inlen, GFP_KERNEL); 996 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 997 if (!*in || !*out) 998 goto outerr; 999 1000 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 1001 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1002 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1003 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1004 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1005 MLX5_SET(qpc, qpc, rwe, 1); 1006 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1007 MLX5_SET(ads, pp, vhca_port_num, 1); 1008 break; 1009 case MLX5_CMD_OP_INIT2RTR_QP: 1010 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1011 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1012 *in = kzalloc(*inlen, GFP_KERNEL); 1013 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1014 if (!*in || !*out) 1015 goto outerr; 1016 1017 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1018 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1019 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1020 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1021 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1022 MLX5_SET(qpc, qpc, log_msg_max, 30); 1023 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1024 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1025 MLX5_SET(ads, pp, fl, 1); 1026 break; 1027 case MLX5_CMD_OP_RTR2RTS_QP: 1028 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1029 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1030 *in = kzalloc(*inlen, GFP_KERNEL); 1031 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1032 if (!*in || !*out) 1033 goto outerr; 1034 1035 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1036 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1037 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1038 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1039 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1040 MLX5_SET(ads, pp, ack_timeout, 14); 1041 MLX5_SET(qpc, qpc, retry_count, 7); 1042 MLX5_SET(qpc, qpc, rnr_retry, 7); 1043 break; 1044 default: 1045 goto outerr_nullify; 1046 } 1047 1048 return; 1049 1050 outerr: 1051 kfree(*in); 1052 kfree(*out); 1053 outerr_nullify: 1054 *in = NULL; 1055 *out = NULL; 1056 } 1057 1058 static void free_inout(void *in, void *out) 1059 { 1060 kfree(in); 1061 kfree(out); 1062 } 1063 1064 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1065 * firmware. The fw argument indicates whether the subjected QP is the one used 1066 * by firmware. 1067 */ 1068 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1069 { 1070 int outlen; 1071 int inlen; 1072 void *out; 1073 void *in; 1074 int err; 1075 1076 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1077 if (!in || !out) 1078 return -ENOMEM; 1079 1080 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1081 free_inout(in, out); 1082 return err; 1083 } 1084 1085 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1086 { 1087 int err; 1088 1089 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1090 if (err) 1091 return err; 1092 1093 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1094 if (err) 1095 return err; 1096 1097 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1098 if (err) 1099 return err; 1100 1101 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1102 if (err) 1103 return err; 1104 1105 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1106 if (err) 1107 return err; 1108 1109 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1110 if (err) 1111 return err; 1112 1113 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1114 } 1115 1116 struct mlx5_virtq_attr { 1117 u8 state; 1118 u16 available_index; 1119 u16 used_index; 1120 }; 1121 1122 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1123 struct mlx5_virtq_attr *attr) 1124 { 1125 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1126 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1127 void *out; 1128 void *obj_context; 1129 void *cmd_hdr; 1130 int err; 1131 1132 out = kzalloc(outlen, GFP_KERNEL); 1133 if (!out) 1134 return -ENOMEM; 1135 1136 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1137 1138 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1139 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1140 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1141 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1142 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1143 if (err) 1144 goto err_cmd; 1145 1146 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1147 memset(attr, 0, sizeof(*attr)); 1148 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1149 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1150 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1151 kfree(out); 1152 return 0; 1153 1154 err_cmd: 1155 kfree(out); 1156 return err; 1157 } 1158 1159 static bool is_valid_state_change(int oldstate, int newstate) 1160 { 1161 switch (oldstate) { 1162 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1163 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1164 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1165 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1166 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1167 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1168 default: 1169 return false; 1170 } 1171 } 1172 1173 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1174 { 1175 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1176 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1177 void *obj_context; 1178 void *cmd_hdr; 1179 void *in; 1180 int err; 1181 1182 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1183 return 0; 1184 1185 if (!is_valid_state_change(mvq->fw_state, state)) 1186 return -EINVAL; 1187 1188 in = kzalloc(inlen, GFP_KERNEL); 1189 if (!in) 1190 return -ENOMEM; 1191 1192 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1193 1194 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1195 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1196 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1197 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1198 1199 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1200 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1201 MLX5_VIRTQ_MODIFY_MASK_STATE); 1202 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1203 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1204 kfree(in); 1205 if (!err) 1206 mvq->fw_state = state; 1207 1208 return err; 1209 } 1210 1211 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1212 { 1213 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1214 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1215 void *cmd_hdr; 1216 int err; 1217 1218 if (!counters_supported(&ndev->mvdev)) 1219 return 0; 1220 1221 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1222 1223 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1224 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1225 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1226 1227 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1228 if (err) 1229 return err; 1230 1231 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1232 1233 return 0; 1234 } 1235 1236 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1237 { 1238 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1239 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1240 1241 if (!counters_supported(&ndev->mvdev)) 1242 return; 1243 1244 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1245 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1246 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1247 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1248 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1249 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1250 } 1251 1252 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1253 { 1254 struct vdpa_callback *cb = priv; 1255 1256 if (cb->callback) 1257 return cb->callback(cb->private); 1258 1259 return IRQ_HANDLED; 1260 } 1261 1262 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1263 struct mlx5_vdpa_virtqueue *mvq) 1264 { 1265 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1266 struct mlx5_vdpa_irq_pool_entry *ent; 1267 int err; 1268 int i; 1269 1270 for (i = 0; i < irqp->num_ent; i++) { 1271 ent = &irqp->entries[i]; 1272 if (!ent->used) { 1273 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1274 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1275 ent->dev_id = &ndev->event_cbs[mvq->index]; 1276 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1277 ent->name, ent->dev_id); 1278 if (err) 1279 return; 1280 1281 ent->used = true; 1282 mvq->map = ent->map; 1283 return; 1284 } 1285 } 1286 } 1287 1288 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1289 struct mlx5_vdpa_virtqueue *mvq) 1290 { 1291 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1292 int i; 1293 1294 for (i = 0; i < irqp->num_ent; i++) 1295 if (mvq->map.virq == irqp->entries[i].map.virq) { 1296 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1297 irqp->entries[i].used = false; 1298 return; 1299 } 1300 } 1301 1302 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1303 { 1304 u16 idx = mvq->index; 1305 int err; 1306 1307 if (!mvq->num_ent) 1308 return 0; 1309 1310 if (mvq->initialized) 1311 return 0; 1312 1313 err = cq_create(ndev, idx, mvq->num_ent); 1314 if (err) 1315 return err; 1316 1317 err = qp_create(ndev, mvq, &mvq->fwqp); 1318 if (err) 1319 goto err_fwqp; 1320 1321 err = qp_create(ndev, mvq, &mvq->vqqp); 1322 if (err) 1323 goto err_vqqp; 1324 1325 err = connect_qps(ndev, mvq); 1326 if (err) 1327 goto err_connect; 1328 1329 err = counter_set_alloc(ndev, mvq); 1330 if (err) 1331 goto err_connect; 1332 1333 alloc_vector(ndev, mvq); 1334 err = create_virtqueue(ndev, mvq); 1335 if (err) 1336 goto err_vq; 1337 1338 if (mvq->ready) { 1339 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1340 if (err) { 1341 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1342 idx, err); 1343 goto err_modify; 1344 } 1345 } 1346 1347 mvq->initialized = true; 1348 return 0; 1349 1350 err_modify: 1351 destroy_virtqueue(ndev, mvq); 1352 err_vq: 1353 dealloc_vector(ndev, mvq); 1354 counter_set_dealloc(ndev, mvq); 1355 err_connect: 1356 qp_destroy(ndev, &mvq->vqqp); 1357 err_vqqp: 1358 qp_destroy(ndev, &mvq->fwqp); 1359 err_fwqp: 1360 cq_destroy(ndev, idx); 1361 return err; 1362 } 1363 1364 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1365 { 1366 struct mlx5_virtq_attr attr; 1367 1368 if (!mvq->initialized) 1369 return; 1370 1371 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1372 return; 1373 1374 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1375 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1376 1377 if (query_virtqueue(ndev, mvq, &attr)) { 1378 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1379 return; 1380 } 1381 mvq->avail_idx = attr.available_index; 1382 mvq->used_idx = attr.used_index; 1383 } 1384 1385 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1386 { 1387 int i; 1388 1389 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1390 suspend_vq(ndev, &ndev->vqs[i]); 1391 } 1392 1393 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1394 { 1395 if (!mvq->initialized) 1396 return; 1397 1398 suspend_vq(ndev, mvq); 1399 destroy_virtqueue(ndev, mvq); 1400 dealloc_vector(ndev, mvq); 1401 counter_set_dealloc(ndev, mvq); 1402 qp_destroy(ndev, &mvq->vqqp); 1403 qp_destroy(ndev, &mvq->fwqp); 1404 cq_destroy(ndev, mvq->index); 1405 mvq->initialized = false; 1406 } 1407 1408 static int create_rqt(struct mlx5_vdpa_net *ndev) 1409 { 1410 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1411 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1412 __be32 *list; 1413 void *rqtc; 1414 int inlen; 1415 void *in; 1416 int i, j; 1417 int err; 1418 1419 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1420 in = kzalloc(inlen, GFP_KERNEL); 1421 if (!in) 1422 return -ENOMEM; 1423 1424 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1425 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1426 1427 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1428 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1429 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1430 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1431 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1432 1433 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1434 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1435 kfree(in); 1436 if (err) 1437 return err; 1438 1439 return 0; 1440 } 1441 1442 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1443 1444 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1445 { 1446 int act_sz = roundup_pow_of_two(num / 2); 1447 __be32 *list; 1448 void *rqtc; 1449 int inlen; 1450 void *in; 1451 int i, j; 1452 int err; 1453 1454 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1455 in = kzalloc(inlen, GFP_KERNEL); 1456 if (!in) 1457 return -ENOMEM; 1458 1459 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1460 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1461 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1462 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1463 1464 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1465 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1466 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1467 1468 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1469 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1470 kfree(in); 1471 if (err) 1472 return err; 1473 1474 return 0; 1475 } 1476 1477 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1478 { 1479 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1480 } 1481 1482 static int create_tir(struct mlx5_vdpa_net *ndev) 1483 { 1484 #define HASH_IP_L4PORTS \ 1485 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1486 MLX5_HASH_FIELD_SEL_L4_DPORT) 1487 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1488 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1489 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1490 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1491 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1492 void *rss_key; 1493 void *outer; 1494 void *tirc; 1495 void *in; 1496 int err; 1497 1498 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1499 if (!in) 1500 return -ENOMEM; 1501 1502 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1503 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1504 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1505 1506 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1507 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1508 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1509 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1510 1511 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1512 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1513 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1514 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1515 1516 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1517 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1518 1519 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1520 kfree(in); 1521 if (err) 1522 return err; 1523 1524 mlx5_vdpa_add_tirn(ndev); 1525 return err; 1526 } 1527 1528 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1529 { 1530 mlx5_vdpa_remove_tirn(ndev); 1531 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1532 } 1533 1534 #define MAX_STEERING_ENT 0x8000 1535 #define MAX_STEERING_GROUPS 2 1536 1537 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1538 #define NUM_DESTS 2 1539 #else 1540 #define NUM_DESTS 1 1541 #endif 1542 1543 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1544 struct macvlan_node *node, 1545 struct mlx5_flow_act *flow_act, 1546 struct mlx5_flow_destination *dests) 1547 { 1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1549 int err; 1550 1551 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1552 if (IS_ERR(node->ucast_counter.counter)) 1553 return PTR_ERR(node->ucast_counter.counter); 1554 1555 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1556 if (IS_ERR(node->mcast_counter.counter)) { 1557 err = PTR_ERR(node->mcast_counter.counter); 1558 goto err_mcast_counter; 1559 } 1560 1561 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1562 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1563 return 0; 1564 1565 err_mcast_counter: 1566 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1567 return err; 1568 #else 1569 return 0; 1570 #endif 1571 } 1572 1573 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1574 struct macvlan_node *node) 1575 { 1576 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1577 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1578 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1579 #endif 1580 } 1581 1582 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1583 struct macvlan_node *node) 1584 { 1585 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1586 struct mlx5_flow_act flow_act = {}; 1587 struct mlx5_flow_spec *spec; 1588 void *headers_c; 1589 void *headers_v; 1590 u8 *dmac_c; 1591 u8 *dmac_v; 1592 int err; 1593 u16 vid; 1594 1595 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1596 if (!spec) 1597 return -ENOMEM; 1598 1599 vid = key2vid(node->macvlan); 1600 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1601 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1602 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1603 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1604 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1605 eth_broadcast_addr(dmac_c); 1606 ether_addr_copy(dmac_v, mac); 1607 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1608 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1609 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1610 } 1611 if (node->tagged) { 1612 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1613 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1614 } 1615 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1616 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1617 dests[0].tir_num = ndev->res.tirn; 1618 err = add_steering_counters(ndev, node, &flow_act, dests); 1619 if (err) 1620 goto out_free; 1621 1622 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1623 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1624 #endif 1625 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1626 if (IS_ERR(node->ucast_rule)) { 1627 err = PTR_ERR(node->ucast_rule); 1628 goto err_ucast; 1629 } 1630 1631 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1632 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1633 #endif 1634 1635 memset(dmac_c, 0, ETH_ALEN); 1636 memset(dmac_v, 0, ETH_ALEN); 1637 dmac_c[0] = 1; 1638 dmac_v[0] = 1; 1639 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1640 if (IS_ERR(node->mcast_rule)) { 1641 err = PTR_ERR(node->mcast_rule); 1642 goto err_mcast; 1643 } 1644 kvfree(spec); 1645 mlx5_vdpa_add_rx_counters(ndev, node); 1646 return 0; 1647 1648 err_mcast: 1649 mlx5_del_flow_rules(node->ucast_rule); 1650 err_ucast: 1651 remove_steering_counters(ndev, node); 1652 out_free: 1653 kvfree(spec); 1654 return err; 1655 } 1656 1657 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1658 struct macvlan_node *node) 1659 { 1660 mlx5_vdpa_remove_rx_counters(ndev, node); 1661 mlx5_del_flow_rules(node->ucast_rule); 1662 mlx5_del_flow_rules(node->mcast_rule); 1663 } 1664 1665 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1666 { 1667 u64 val; 1668 1669 if (!tagged) 1670 vlan = MLX5V_UNTAGGED; 1671 1672 val = (u64)vlan << 48 | 1673 (u64)mac[0] << 40 | 1674 (u64)mac[1] << 32 | 1675 (u64)mac[2] << 24 | 1676 (u64)mac[3] << 16 | 1677 (u64)mac[4] << 8 | 1678 (u64)mac[5]; 1679 1680 return val; 1681 } 1682 1683 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1684 { 1685 struct macvlan_node *pos; 1686 u32 idx; 1687 1688 idx = hash_64(value, 8); // tbd 8 1689 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1690 if (pos->macvlan == value) 1691 return pos; 1692 } 1693 return NULL; 1694 } 1695 1696 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1697 { 1698 struct macvlan_node *ptr; 1699 u64 val; 1700 u32 idx; 1701 int err; 1702 1703 val = search_val(mac, vid, tagged); 1704 if (mac_vlan_lookup(ndev, val)) 1705 return -EEXIST; 1706 1707 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1708 if (!ptr) 1709 return -ENOMEM; 1710 1711 ptr->tagged = tagged; 1712 ptr->macvlan = val; 1713 ptr->ndev = ndev; 1714 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1715 if (err) 1716 goto err_add; 1717 1718 idx = hash_64(val, 8); 1719 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1720 return 0; 1721 1722 err_add: 1723 kfree(ptr); 1724 return err; 1725 } 1726 1727 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1728 { 1729 struct macvlan_node *ptr; 1730 1731 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1732 if (!ptr) 1733 return; 1734 1735 hlist_del(&ptr->hlist); 1736 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1737 remove_steering_counters(ndev, ptr); 1738 kfree(ptr); 1739 } 1740 1741 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1742 { 1743 struct macvlan_node *pos; 1744 struct hlist_node *n; 1745 int i; 1746 1747 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1748 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1749 hlist_del(&pos->hlist); 1750 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1751 remove_steering_counters(ndev, pos); 1752 kfree(pos); 1753 } 1754 } 1755 } 1756 1757 static int setup_steering(struct mlx5_vdpa_net *ndev) 1758 { 1759 struct mlx5_flow_table_attr ft_attr = {}; 1760 struct mlx5_flow_namespace *ns; 1761 int err; 1762 1763 ft_attr.max_fte = MAX_STEERING_ENT; 1764 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1765 1766 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1767 if (!ns) { 1768 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1769 return -EOPNOTSUPP; 1770 } 1771 1772 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1773 if (IS_ERR(ndev->rxft)) { 1774 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1775 return PTR_ERR(ndev->rxft); 1776 } 1777 mlx5_vdpa_add_rx_flow_table(ndev); 1778 1779 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1780 if (err) 1781 goto err_add; 1782 1783 return 0; 1784 1785 err_add: 1786 mlx5_vdpa_remove_rx_flow_table(ndev); 1787 mlx5_destroy_flow_table(ndev->rxft); 1788 return err; 1789 } 1790 1791 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1792 { 1793 clear_mac_vlan_table(ndev); 1794 mlx5_vdpa_remove_rx_flow_table(ndev); 1795 mlx5_destroy_flow_table(ndev->rxft); 1796 } 1797 1798 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1799 { 1800 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1801 struct mlx5_control_vq *cvq = &mvdev->cvq; 1802 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1803 struct mlx5_core_dev *pfmdev; 1804 size_t read; 1805 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1806 1807 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1808 switch (cmd) { 1809 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1810 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1811 if (read != ETH_ALEN) 1812 break; 1813 1814 if (!memcmp(ndev->config.mac, mac, 6)) { 1815 status = VIRTIO_NET_OK; 1816 break; 1817 } 1818 1819 if (is_zero_ether_addr(mac)) 1820 break; 1821 1822 if (!is_zero_ether_addr(ndev->config.mac)) { 1823 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1824 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1825 ndev->config.mac); 1826 break; 1827 } 1828 } 1829 1830 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1831 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1832 mac); 1833 break; 1834 } 1835 1836 /* backup the original mac address so that if failed to add the forward rules 1837 * we could restore it 1838 */ 1839 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1840 1841 memcpy(ndev->config.mac, mac, ETH_ALEN); 1842 1843 /* Need recreate the flow table entry, so that the packet could forward back 1844 */ 1845 mac_vlan_del(ndev, mac_back, 0, false); 1846 1847 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1848 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1849 1850 /* Although it hardly run here, we still need double check */ 1851 if (is_zero_ether_addr(mac_back)) { 1852 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1853 break; 1854 } 1855 1856 /* Try to restore original mac address to MFPS table, and try to restore 1857 * the forward rule entry. 1858 */ 1859 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1860 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1861 ndev->config.mac); 1862 } 1863 1864 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1865 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1866 mac_back); 1867 } 1868 1869 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1870 1871 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1872 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1873 1874 break; 1875 } 1876 1877 status = VIRTIO_NET_OK; 1878 break; 1879 1880 default: 1881 break; 1882 } 1883 1884 return status; 1885 } 1886 1887 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1888 { 1889 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1890 int cur_qps = ndev->cur_num_vqs / 2; 1891 int err; 1892 int i; 1893 1894 if (cur_qps > newqps) { 1895 err = modify_rqt(ndev, 2 * newqps); 1896 if (err) 1897 return err; 1898 1899 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1900 teardown_vq(ndev, &ndev->vqs[i]); 1901 1902 ndev->cur_num_vqs = 2 * newqps; 1903 } else { 1904 ndev->cur_num_vqs = 2 * newqps; 1905 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1906 err = setup_vq(ndev, &ndev->vqs[i]); 1907 if (err) 1908 goto clean_added; 1909 } 1910 err = modify_rqt(ndev, 2 * newqps); 1911 if (err) 1912 goto clean_added; 1913 } 1914 return 0; 1915 1916 clean_added: 1917 for (--i; i >= 2 * cur_qps; --i) 1918 teardown_vq(ndev, &ndev->vqs[i]); 1919 1920 ndev->cur_num_vqs = 2 * cur_qps; 1921 1922 return err; 1923 } 1924 1925 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1926 { 1927 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1928 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1929 struct mlx5_control_vq *cvq = &mvdev->cvq; 1930 struct virtio_net_ctrl_mq mq; 1931 size_t read; 1932 u16 newqps; 1933 1934 switch (cmd) { 1935 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1936 /* This mq feature check aligns with pre-existing userspace 1937 * implementation. 1938 * 1939 * Without it, an untrusted driver could fake a multiqueue config 1940 * request down to a non-mq device that may cause kernel to 1941 * panic due to uninitialized resources for extra vqs. Even with 1942 * a well behaving guest driver, it is not expected to allow 1943 * changing the number of vqs on a non-mq device. 1944 */ 1945 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1946 break; 1947 1948 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1949 if (read != sizeof(mq)) 1950 break; 1951 1952 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1953 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1954 newqps > ndev->rqt_size) 1955 break; 1956 1957 if (ndev->cur_num_vqs == 2 * newqps) { 1958 status = VIRTIO_NET_OK; 1959 break; 1960 } 1961 1962 if (!change_num_qps(mvdev, newqps)) 1963 status = VIRTIO_NET_OK; 1964 1965 break; 1966 default: 1967 break; 1968 } 1969 1970 return status; 1971 } 1972 1973 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1974 { 1975 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1976 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1977 struct mlx5_control_vq *cvq = &mvdev->cvq; 1978 __virtio16 vlan; 1979 size_t read; 1980 u16 id; 1981 1982 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 1983 return status; 1984 1985 switch (cmd) { 1986 case VIRTIO_NET_CTRL_VLAN_ADD: 1987 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1988 if (read != sizeof(vlan)) 1989 break; 1990 1991 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1992 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1993 break; 1994 1995 status = VIRTIO_NET_OK; 1996 break; 1997 case VIRTIO_NET_CTRL_VLAN_DEL: 1998 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1999 if (read != sizeof(vlan)) 2000 break; 2001 2002 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2003 mac_vlan_del(ndev, ndev->config.mac, id, true); 2004 status = VIRTIO_NET_OK; 2005 break; 2006 default: 2007 break; 2008 } 2009 2010 return status; 2011 } 2012 2013 static void mlx5_cvq_kick_handler(struct work_struct *work) 2014 { 2015 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2016 struct virtio_net_ctrl_hdr ctrl; 2017 struct mlx5_vdpa_wq_ent *wqent; 2018 struct mlx5_vdpa_dev *mvdev; 2019 struct mlx5_control_vq *cvq; 2020 struct mlx5_vdpa_net *ndev; 2021 size_t read, write; 2022 int err; 2023 2024 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2025 mvdev = wqent->mvdev; 2026 ndev = to_mlx5_vdpa_ndev(mvdev); 2027 cvq = &mvdev->cvq; 2028 2029 down_write(&ndev->reslock); 2030 2031 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2032 goto out; 2033 2034 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2035 goto out; 2036 2037 if (!cvq->ready) 2038 goto out; 2039 2040 while (true) { 2041 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2042 GFP_ATOMIC); 2043 if (err <= 0) 2044 break; 2045 2046 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2047 if (read != sizeof(ctrl)) 2048 break; 2049 2050 cvq->received_desc++; 2051 switch (ctrl.class) { 2052 case VIRTIO_NET_CTRL_MAC: 2053 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2054 break; 2055 case VIRTIO_NET_CTRL_MQ: 2056 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2057 break; 2058 case VIRTIO_NET_CTRL_VLAN: 2059 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2060 break; 2061 default: 2062 break; 2063 } 2064 2065 /* Make sure data is written before advancing index */ 2066 smp_wmb(); 2067 2068 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2069 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2070 vringh_kiov_cleanup(&cvq->riov); 2071 vringh_kiov_cleanup(&cvq->wiov); 2072 2073 if (vringh_need_notify_iotlb(&cvq->vring)) 2074 vringh_notify(&cvq->vring); 2075 2076 cvq->completed_desc++; 2077 queue_work(mvdev->wq, &wqent->work); 2078 break; 2079 } 2080 2081 out: 2082 up_write(&ndev->reslock); 2083 } 2084 2085 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2086 { 2087 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2088 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2089 struct mlx5_vdpa_virtqueue *mvq; 2090 2091 if (!is_index_valid(mvdev, idx)) 2092 return; 2093 2094 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2095 if (!mvdev->wq || !mvdev->cvq.ready) 2096 return; 2097 2098 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2099 return; 2100 } 2101 2102 mvq = &ndev->vqs[idx]; 2103 if (unlikely(!mvq->ready)) 2104 return; 2105 2106 iowrite16(idx, ndev->mvdev.res.kick_addr); 2107 } 2108 2109 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2110 u64 driver_area, u64 device_area) 2111 { 2112 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2113 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2114 struct mlx5_vdpa_virtqueue *mvq; 2115 2116 if (!is_index_valid(mvdev, idx)) 2117 return -EINVAL; 2118 2119 if (is_ctrl_vq_idx(mvdev, idx)) { 2120 mvdev->cvq.desc_addr = desc_area; 2121 mvdev->cvq.device_addr = device_area; 2122 mvdev->cvq.driver_addr = driver_area; 2123 return 0; 2124 } 2125 2126 mvq = &ndev->vqs[idx]; 2127 mvq->desc_addr = desc_area; 2128 mvq->device_addr = device_area; 2129 mvq->driver_addr = driver_area; 2130 return 0; 2131 } 2132 2133 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2134 { 2135 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2136 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2137 struct mlx5_vdpa_virtqueue *mvq; 2138 2139 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2140 return; 2141 2142 mvq = &ndev->vqs[idx]; 2143 mvq->num_ent = num; 2144 } 2145 2146 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2147 { 2148 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2149 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2150 2151 ndev->event_cbs[idx] = *cb; 2152 if (is_ctrl_vq_idx(mvdev, idx)) 2153 mvdev->cvq.event_cb = *cb; 2154 } 2155 2156 static void mlx5_cvq_notify(struct vringh *vring) 2157 { 2158 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2159 2160 if (!cvq->event_cb.callback) 2161 return; 2162 2163 cvq->event_cb.callback(cvq->event_cb.private); 2164 } 2165 2166 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2167 { 2168 struct mlx5_control_vq *cvq = &mvdev->cvq; 2169 2170 cvq->ready = ready; 2171 if (!ready) 2172 return; 2173 2174 cvq->vring.notify = mlx5_cvq_notify; 2175 } 2176 2177 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2178 { 2179 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2180 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2181 struct mlx5_vdpa_virtqueue *mvq; 2182 int err; 2183 2184 if (!mvdev->actual_features) 2185 return; 2186 2187 if (!is_index_valid(mvdev, idx)) 2188 return; 2189 2190 if (is_ctrl_vq_idx(mvdev, idx)) { 2191 set_cvq_ready(mvdev, ready); 2192 return; 2193 } 2194 2195 mvq = &ndev->vqs[idx]; 2196 if (!ready) { 2197 suspend_vq(ndev, mvq); 2198 } else { 2199 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2200 if (err) { 2201 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2202 ready = false; 2203 } 2204 } 2205 2206 2207 mvq->ready = ready; 2208 } 2209 2210 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2211 { 2212 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2213 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2214 2215 if (!is_index_valid(mvdev, idx)) 2216 return false; 2217 2218 if (is_ctrl_vq_idx(mvdev, idx)) 2219 return mvdev->cvq.ready; 2220 2221 return ndev->vqs[idx].ready; 2222 } 2223 2224 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2225 const struct vdpa_vq_state *state) 2226 { 2227 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2228 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2229 struct mlx5_vdpa_virtqueue *mvq; 2230 2231 if (!is_index_valid(mvdev, idx)) 2232 return -EINVAL; 2233 2234 if (is_ctrl_vq_idx(mvdev, idx)) { 2235 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2236 return 0; 2237 } 2238 2239 mvq = &ndev->vqs[idx]; 2240 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2241 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2242 return -EINVAL; 2243 } 2244 2245 mvq->used_idx = state->split.avail_index; 2246 mvq->avail_idx = state->split.avail_index; 2247 return 0; 2248 } 2249 2250 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2251 { 2252 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2253 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2254 struct mlx5_vdpa_virtqueue *mvq; 2255 struct mlx5_virtq_attr attr; 2256 int err; 2257 2258 if (!is_index_valid(mvdev, idx)) 2259 return -EINVAL; 2260 2261 if (is_ctrl_vq_idx(mvdev, idx)) { 2262 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2263 return 0; 2264 } 2265 2266 mvq = &ndev->vqs[idx]; 2267 /* If the virtq object was destroyed, use the value saved at 2268 * the last minute of suspend_vq. This caters for userspace 2269 * that cares about emulating the index after vq is stopped. 2270 */ 2271 if (!mvq->initialized) { 2272 /* Firmware returns a wrong value for the available index. 2273 * Since both values should be identical, we take the value of 2274 * used_idx which is reported correctly. 2275 */ 2276 state->split.avail_index = mvq->used_idx; 2277 return 0; 2278 } 2279 2280 err = query_virtqueue(ndev, mvq, &attr); 2281 if (err) { 2282 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2283 return err; 2284 } 2285 state->split.avail_index = attr.used_index; 2286 return 0; 2287 } 2288 2289 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2290 { 2291 return PAGE_SIZE; 2292 } 2293 2294 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2295 { 2296 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2297 2298 if (is_ctrl_vq_idx(mvdev, idx)) 2299 return MLX5_VDPA_CVQ_GROUP; 2300 2301 return MLX5_VDPA_DATAVQ_GROUP; 2302 } 2303 2304 static u64 mlx_to_vritio_features(u16 dev_features) 2305 { 2306 u64 result = 0; 2307 2308 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2309 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2310 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2311 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2312 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2313 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2314 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2315 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2316 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2317 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2318 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2319 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2320 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2321 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2322 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2323 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2324 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2325 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2326 2327 return result; 2328 } 2329 2330 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2331 { 2332 u64 mlx_vdpa_features = 0; 2333 u16 dev_features; 2334 2335 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2336 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2337 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2338 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2339 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2340 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2341 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2342 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2343 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2344 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2345 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2346 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2347 2348 return mlx_vdpa_features; 2349 } 2350 2351 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2352 { 2353 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2354 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2355 2356 print_features(mvdev, ndev->mvdev.mlx_features, false); 2357 return ndev->mvdev.mlx_features; 2358 } 2359 2360 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2361 { 2362 /* Minimum features to expect */ 2363 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2364 return -EOPNOTSUPP; 2365 2366 /* Double check features combination sent down by the driver. 2367 * Fail invalid features due to absence of the depended feature. 2368 * 2369 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2370 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2371 * By failing the invalid features sent down by untrusted drivers, 2372 * we're assured the assumption made upon is_index_valid() and 2373 * is_ctrl_vq_idx() will not be compromised. 2374 */ 2375 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2376 BIT_ULL(VIRTIO_NET_F_MQ)) 2377 return -EINVAL; 2378 2379 return 0; 2380 } 2381 2382 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2383 { 2384 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2385 int err; 2386 int i; 2387 2388 for (i = 0; i < mvdev->max_vqs; i++) { 2389 err = setup_vq(ndev, &ndev->vqs[i]); 2390 if (err) 2391 goto err_vq; 2392 } 2393 2394 return 0; 2395 2396 err_vq: 2397 for (--i; i >= 0; i--) 2398 teardown_vq(ndev, &ndev->vqs[i]); 2399 2400 return err; 2401 } 2402 2403 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2404 { 2405 struct mlx5_vdpa_virtqueue *mvq; 2406 int i; 2407 2408 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2409 mvq = &ndev->vqs[i]; 2410 if (!mvq->initialized) 2411 continue; 2412 2413 teardown_vq(ndev, mvq); 2414 } 2415 } 2416 2417 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2418 { 2419 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2420 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2421 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2422 mvdev->max_idx = mvdev->max_vqs; 2423 } else { 2424 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2425 * CVQ gets index 2 2426 */ 2427 mvdev->max_idx = 2; 2428 } 2429 } else { 2430 /* Two data virtqueues only: one for rx and one for tx */ 2431 mvdev->max_idx = 1; 2432 } 2433 } 2434 2435 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2436 { 2437 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2438 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2439 int err; 2440 2441 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2442 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2443 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2444 if (vport) 2445 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2446 2447 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2448 if (err) 2449 return 0; 2450 2451 return MLX5_GET(query_vport_state_out, out, state); 2452 } 2453 2454 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2455 { 2456 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2457 VPORT_STATE_UP) 2458 return true; 2459 2460 return false; 2461 } 2462 2463 static void update_carrier(struct work_struct *work) 2464 { 2465 struct mlx5_vdpa_wq_ent *wqent; 2466 struct mlx5_vdpa_dev *mvdev; 2467 struct mlx5_vdpa_net *ndev; 2468 2469 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2470 mvdev = wqent->mvdev; 2471 ndev = to_mlx5_vdpa_ndev(mvdev); 2472 if (get_link_state(mvdev)) 2473 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2474 else 2475 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2476 2477 if (ndev->config_cb.callback) 2478 ndev->config_cb.callback(ndev->config_cb.private); 2479 2480 kfree(wqent); 2481 } 2482 2483 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2484 { 2485 struct mlx5_vdpa_wq_ent *wqent; 2486 2487 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2488 if (!wqent) 2489 return -ENOMEM; 2490 2491 wqent->mvdev = &ndev->mvdev; 2492 INIT_WORK(&wqent->work, update_carrier); 2493 queue_work(ndev->mvdev.wq, &wqent->work); 2494 return 0; 2495 } 2496 2497 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2498 { 2499 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2500 struct mlx5_eqe *eqe = param; 2501 int ret = NOTIFY_DONE; 2502 2503 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2504 switch (eqe->sub_type) { 2505 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2506 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2507 if (queue_link_work(ndev)) 2508 return NOTIFY_DONE; 2509 2510 ret = NOTIFY_OK; 2511 break; 2512 default: 2513 return NOTIFY_DONE; 2514 } 2515 return ret; 2516 } 2517 return ret; 2518 } 2519 2520 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2521 { 2522 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2523 return; 2524 2525 ndev->nb.notifier_call = event_handler; 2526 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2527 ndev->nb_registered = true; 2528 queue_link_work(ndev); 2529 } 2530 2531 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2532 { 2533 if (!ndev->nb_registered) 2534 return; 2535 2536 ndev->nb_registered = false; 2537 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2538 if (ndev->mvdev.wq) 2539 flush_workqueue(ndev->mvdev.wq); 2540 } 2541 2542 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2543 { 2544 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2545 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2546 int err; 2547 2548 print_features(mvdev, features, true); 2549 2550 err = verify_driver_features(mvdev, features); 2551 if (err) 2552 return err; 2553 2554 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2555 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2556 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2557 else 2558 ndev->rqt_size = 1; 2559 2560 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 2561 * 5.1.6.5.5 "Device operation in multiqueue mode": 2562 * 2563 * Multiqueue is disabled by default. 2564 * The driver enables multiqueue by sending a command using class 2565 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 2566 * operation, as follows: ... 2567 */ 2568 ndev->cur_num_vqs = 2; 2569 2570 update_cvq_info(mvdev); 2571 return err; 2572 } 2573 2574 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2575 { 2576 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2577 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2578 2579 ndev->config_cb = *cb; 2580 } 2581 2582 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2583 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2584 { 2585 return MLX5_VDPA_MAX_VQ_ENTRIES; 2586 } 2587 2588 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2589 { 2590 return VIRTIO_ID_NET; 2591 } 2592 2593 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2594 { 2595 return PCI_VENDOR_ID_MELLANOX; 2596 } 2597 2598 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2599 { 2600 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2601 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2602 2603 print_status(mvdev, ndev->mvdev.status, false); 2604 return ndev->mvdev.status; 2605 } 2606 2607 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2608 { 2609 struct mlx5_vq_restore_info *ri = &mvq->ri; 2610 struct mlx5_virtq_attr attr = {}; 2611 int err; 2612 2613 if (mvq->initialized) { 2614 err = query_virtqueue(ndev, mvq, &attr); 2615 if (err) 2616 return err; 2617 } 2618 2619 ri->avail_index = attr.available_index; 2620 ri->used_index = attr.used_index; 2621 ri->ready = mvq->ready; 2622 ri->num_ent = mvq->num_ent; 2623 ri->desc_addr = mvq->desc_addr; 2624 ri->device_addr = mvq->device_addr; 2625 ri->driver_addr = mvq->driver_addr; 2626 ri->map = mvq->map; 2627 ri->restore = true; 2628 return 0; 2629 } 2630 2631 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2632 { 2633 int i; 2634 2635 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2636 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2637 save_channel_info(ndev, &ndev->vqs[i]); 2638 } 2639 return 0; 2640 } 2641 2642 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2643 { 2644 int i; 2645 2646 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2647 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2648 } 2649 2650 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2651 { 2652 struct mlx5_vdpa_virtqueue *mvq; 2653 struct mlx5_vq_restore_info *ri; 2654 int i; 2655 2656 mlx5_clear_vqs(ndev); 2657 init_mvqs(ndev); 2658 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2659 mvq = &ndev->vqs[i]; 2660 ri = &mvq->ri; 2661 if (!ri->restore) 2662 continue; 2663 2664 mvq->avail_idx = ri->avail_index; 2665 mvq->used_idx = ri->used_index; 2666 mvq->ready = ri->ready; 2667 mvq->num_ent = ri->num_ent; 2668 mvq->desc_addr = ri->desc_addr; 2669 mvq->device_addr = ri->device_addr; 2670 mvq->driver_addr = ri->driver_addr; 2671 mvq->map = ri->map; 2672 } 2673 } 2674 2675 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2676 struct vhost_iotlb *iotlb, unsigned int asid) 2677 { 2678 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2679 int err; 2680 2681 suspend_vqs(ndev); 2682 err = save_channels_info(ndev); 2683 if (err) 2684 goto err_mr; 2685 2686 teardown_driver(ndev); 2687 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2688 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); 2689 if (err) 2690 goto err_mr; 2691 2692 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2693 goto err_mr; 2694 2695 restore_channels_info(ndev); 2696 err = setup_driver(mvdev); 2697 if (err) 2698 goto err_setup; 2699 2700 return 0; 2701 2702 err_setup: 2703 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2704 err_mr: 2705 return err; 2706 } 2707 2708 /* reslock must be held for this function */ 2709 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2710 { 2711 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2712 int err; 2713 2714 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2715 2716 if (ndev->setup) { 2717 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2718 err = 0; 2719 goto out; 2720 } 2721 mlx5_vdpa_add_debugfs(ndev); 2722 2723 err = read_umem_params(ndev); 2724 if (err) 2725 goto err_setup; 2726 2727 err = setup_virtqueues(mvdev); 2728 if (err) { 2729 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2730 goto err_setup; 2731 } 2732 2733 err = create_rqt(ndev); 2734 if (err) { 2735 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2736 goto err_rqt; 2737 } 2738 2739 err = create_tir(ndev); 2740 if (err) { 2741 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2742 goto err_tir; 2743 } 2744 2745 err = setup_steering(ndev); 2746 if (err) { 2747 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2748 goto err_fwd; 2749 } 2750 ndev->setup = true; 2751 2752 return 0; 2753 2754 err_fwd: 2755 destroy_tir(ndev); 2756 err_tir: 2757 destroy_rqt(ndev); 2758 err_rqt: 2759 teardown_virtqueues(ndev); 2760 err_setup: 2761 mlx5_vdpa_remove_debugfs(ndev); 2762 out: 2763 return err; 2764 } 2765 2766 /* reslock must be held for this function */ 2767 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2768 { 2769 2770 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2771 2772 if (!ndev->setup) 2773 return; 2774 2775 mlx5_vdpa_remove_debugfs(ndev); 2776 teardown_steering(ndev); 2777 destroy_tir(ndev); 2778 destroy_rqt(ndev); 2779 teardown_virtqueues(ndev); 2780 ndev->setup = false; 2781 } 2782 2783 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2784 { 2785 int i; 2786 2787 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2788 ndev->vqs[i].ready = false; 2789 2790 ndev->mvdev.cvq.ready = false; 2791 } 2792 2793 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 2794 { 2795 struct mlx5_control_vq *cvq = &mvdev->cvq; 2796 int err = 0; 2797 2798 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) 2799 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2800 MLX5_CVQ_MAX_ENT, false, 2801 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2802 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2803 (struct vring_used *)(uintptr_t)cvq->device_addr); 2804 2805 return err; 2806 } 2807 2808 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2809 { 2810 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2811 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2812 int err; 2813 2814 print_status(mvdev, status, true); 2815 2816 down_write(&ndev->reslock); 2817 2818 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2819 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2820 err = setup_cvq_vring(mvdev); 2821 if (err) { 2822 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2823 goto err_setup; 2824 } 2825 register_link_notifier(ndev); 2826 err = setup_driver(mvdev); 2827 if (err) { 2828 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2829 goto err_driver; 2830 } 2831 } else { 2832 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2833 goto err_clear; 2834 } 2835 } 2836 2837 ndev->mvdev.status = status; 2838 up_write(&ndev->reslock); 2839 return; 2840 2841 err_driver: 2842 unregister_link_notifier(ndev); 2843 err_setup: 2844 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2845 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2846 err_clear: 2847 up_write(&ndev->reslock); 2848 } 2849 2850 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 2851 { 2852 int i; 2853 2854 /* default mapping all groups are mapped to asid 0 */ 2855 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 2856 mvdev->group2asid[i] = 0; 2857 } 2858 2859 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2860 { 2861 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2862 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2863 2864 print_status(mvdev, 0, true); 2865 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2866 2867 down_write(&ndev->reslock); 2868 unregister_link_notifier(ndev); 2869 teardown_driver(ndev); 2870 clear_vqs_ready(ndev); 2871 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2872 ndev->mvdev.status = 0; 2873 ndev->mvdev.suspended = false; 2874 ndev->cur_num_vqs = 0; 2875 ndev->mvdev.cvq.received_desc = 0; 2876 ndev->mvdev.cvq.completed_desc = 0; 2877 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2878 ndev->mvdev.actual_features = 0; 2879 init_group_to_asid_map(mvdev); 2880 ++mvdev->generation; 2881 2882 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2883 if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) 2884 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2885 } 2886 up_write(&ndev->reslock); 2887 2888 return 0; 2889 } 2890 2891 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2892 { 2893 return sizeof(struct virtio_net_config); 2894 } 2895 2896 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2897 unsigned int len) 2898 { 2899 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2900 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2901 2902 if (offset + len <= sizeof(struct virtio_net_config)) 2903 memcpy(buf, (u8 *)&ndev->config + offset, len); 2904 } 2905 2906 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2907 unsigned int len) 2908 { 2909 /* not supported */ 2910 } 2911 2912 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2913 { 2914 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2915 2916 return mvdev->generation; 2917 } 2918 2919 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 2920 unsigned int asid) 2921 { 2922 bool change_map; 2923 int err; 2924 2925 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); 2926 if (err) { 2927 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2928 return err; 2929 } 2930 2931 if (change_map) 2932 err = mlx5_vdpa_change_map(mvdev, iotlb, asid); 2933 2934 return err; 2935 } 2936 2937 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2938 struct vhost_iotlb *iotlb) 2939 { 2940 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2941 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2942 int err = -EINVAL; 2943 2944 down_write(&ndev->reslock); 2945 err = set_map_data(mvdev, iotlb, asid); 2946 up_write(&ndev->reslock); 2947 return err; 2948 } 2949 2950 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 2951 { 2952 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2953 2954 if (is_ctrl_vq_idx(mvdev, idx)) 2955 return &vdev->dev; 2956 2957 return mvdev->vdev.dma_dev; 2958 } 2959 2960 static void free_irqs(struct mlx5_vdpa_net *ndev) 2961 { 2962 struct mlx5_vdpa_irq_pool_entry *ent; 2963 int i; 2964 2965 if (!msix_mode_supported(&ndev->mvdev)) 2966 return; 2967 2968 if (!ndev->irqp.entries) 2969 return; 2970 2971 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 2972 ent = ndev->irqp.entries + i; 2973 if (ent->map.virq) 2974 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 2975 } 2976 kfree(ndev->irqp.entries); 2977 } 2978 2979 static void mlx5_vdpa_free(struct vdpa_device *vdev) 2980 { 2981 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2982 struct mlx5_core_dev *pfmdev; 2983 struct mlx5_vdpa_net *ndev; 2984 2985 ndev = to_mlx5_vdpa_ndev(mvdev); 2986 2987 free_resources(ndev); 2988 mlx5_vdpa_destroy_mr(mvdev); 2989 if (!is_zero_ether_addr(ndev->config.mac)) { 2990 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2991 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 2992 } 2993 mlx5_vdpa_free_resources(&ndev->mvdev); 2994 free_irqs(ndev); 2995 kfree(ndev->event_cbs); 2996 kfree(ndev->vqs); 2997 } 2998 2999 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3000 { 3001 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3002 struct vdpa_notification_area ret = {}; 3003 struct mlx5_vdpa_net *ndev; 3004 phys_addr_t addr; 3005 3006 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3007 return ret; 3008 3009 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3010 * notification to avoid the risk of mapping pages that contain BAR of more 3011 * than one SF 3012 */ 3013 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3014 return ret; 3015 3016 ndev = to_mlx5_vdpa_ndev(mvdev); 3017 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3018 ret.addr = addr; 3019 ret.size = PAGE_SIZE; 3020 return ret; 3021 } 3022 3023 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3024 { 3025 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3026 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3027 struct mlx5_vdpa_virtqueue *mvq; 3028 3029 if (!is_index_valid(mvdev, idx)) 3030 return -EINVAL; 3031 3032 if (is_ctrl_vq_idx(mvdev, idx)) 3033 return -EOPNOTSUPP; 3034 3035 mvq = &ndev->vqs[idx]; 3036 if (!mvq->map.virq) 3037 return -EOPNOTSUPP; 3038 3039 return mvq->map.virq; 3040 } 3041 3042 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3043 { 3044 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3045 3046 return mvdev->actual_features; 3047 } 3048 3049 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3050 u64 *received_desc, u64 *completed_desc) 3051 { 3052 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3053 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3054 void *cmd_hdr; 3055 void *ctx; 3056 int err; 3057 3058 if (!counters_supported(&ndev->mvdev)) 3059 return -EOPNOTSUPP; 3060 3061 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3062 return -EAGAIN; 3063 3064 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3065 3066 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3067 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3068 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3069 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3070 3071 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3072 if (err) 3073 return err; 3074 3075 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3076 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3077 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3078 return 0; 3079 } 3080 3081 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3082 struct sk_buff *msg, 3083 struct netlink_ext_ack *extack) 3084 { 3085 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3086 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3087 struct mlx5_vdpa_virtqueue *mvq; 3088 struct mlx5_control_vq *cvq; 3089 u64 received_desc; 3090 u64 completed_desc; 3091 int err = 0; 3092 3093 down_read(&ndev->reslock); 3094 if (!is_index_valid(mvdev, idx)) { 3095 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3096 err = -EINVAL; 3097 goto out_err; 3098 } 3099 3100 if (idx == ctrl_vq_idx(mvdev)) { 3101 cvq = &mvdev->cvq; 3102 received_desc = cvq->received_desc; 3103 completed_desc = cvq->completed_desc; 3104 goto out; 3105 } 3106 3107 mvq = &ndev->vqs[idx]; 3108 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3109 if (err) { 3110 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3111 goto out_err; 3112 } 3113 3114 out: 3115 err = -EMSGSIZE; 3116 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3117 goto out_err; 3118 3119 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3120 VDPA_ATTR_PAD)) 3121 goto out_err; 3122 3123 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3124 goto out_err; 3125 3126 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3127 VDPA_ATTR_PAD)) 3128 goto out_err; 3129 3130 err = 0; 3131 out_err: 3132 up_read(&ndev->reslock); 3133 return err; 3134 } 3135 3136 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3137 { 3138 struct mlx5_control_vq *cvq; 3139 3140 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3141 return; 3142 3143 cvq = &mvdev->cvq; 3144 cvq->ready = false; 3145 } 3146 3147 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3148 { 3149 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3150 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3151 struct mlx5_vdpa_virtqueue *mvq; 3152 int i; 3153 3154 mlx5_vdpa_info(mvdev, "suspending device\n"); 3155 3156 down_write(&ndev->reslock); 3157 unregister_link_notifier(ndev); 3158 for (i = 0; i < ndev->cur_num_vqs; i++) { 3159 mvq = &ndev->vqs[i]; 3160 suspend_vq(ndev, mvq); 3161 } 3162 mlx5_vdpa_cvq_suspend(mvdev); 3163 mvdev->suspended = true; 3164 up_write(&ndev->reslock); 3165 return 0; 3166 } 3167 3168 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3169 unsigned int asid) 3170 { 3171 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3172 3173 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3174 return -EINVAL; 3175 3176 mvdev->group2asid[group] = asid; 3177 return 0; 3178 } 3179 3180 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3181 .set_vq_address = mlx5_vdpa_set_vq_address, 3182 .set_vq_num = mlx5_vdpa_set_vq_num, 3183 .kick_vq = mlx5_vdpa_kick_vq, 3184 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3185 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3186 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3187 .set_vq_state = mlx5_vdpa_set_vq_state, 3188 .get_vq_state = mlx5_vdpa_get_vq_state, 3189 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3190 .get_vq_notification = mlx5_get_vq_notification, 3191 .get_vq_irq = mlx5_get_vq_irq, 3192 .get_vq_align = mlx5_vdpa_get_vq_align, 3193 .get_vq_group = mlx5_vdpa_get_vq_group, 3194 .get_device_features = mlx5_vdpa_get_device_features, 3195 .set_driver_features = mlx5_vdpa_set_driver_features, 3196 .get_driver_features = mlx5_vdpa_get_driver_features, 3197 .set_config_cb = mlx5_vdpa_set_config_cb, 3198 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3199 .get_device_id = mlx5_vdpa_get_device_id, 3200 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3201 .get_status = mlx5_vdpa_get_status, 3202 .set_status = mlx5_vdpa_set_status, 3203 .reset = mlx5_vdpa_reset, 3204 .get_config_size = mlx5_vdpa_get_config_size, 3205 .get_config = mlx5_vdpa_get_config, 3206 .set_config = mlx5_vdpa_set_config, 3207 .get_generation = mlx5_vdpa_get_generation, 3208 .set_map = mlx5_vdpa_set_map, 3209 .set_group_asid = mlx5_set_group_asid, 3210 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3211 .free = mlx5_vdpa_free, 3212 .suspend = mlx5_vdpa_suspend, 3213 }; 3214 3215 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3216 { 3217 u16 hw_mtu; 3218 int err; 3219 3220 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3221 if (err) 3222 return err; 3223 3224 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3225 return 0; 3226 } 3227 3228 static int alloc_resources(struct mlx5_vdpa_net *ndev) 3229 { 3230 struct mlx5_vdpa_net_resources *res = &ndev->res; 3231 int err; 3232 3233 if (res->valid) { 3234 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3235 return -EEXIST; 3236 } 3237 3238 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3239 if (err) 3240 return err; 3241 3242 err = create_tis(ndev); 3243 if (err) 3244 goto err_tis; 3245 3246 res->valid = true; 3247 3248 return 0; 3249 3250 err_tis: 3251 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3252 return err; 3253 } 3254 3255 static void free_resources(struct mlx5_vdpa_net *ndev) 3256 { 3257 struct mlx5_vdpa_net_resources *res = &ndev->res; 3258 3259 if (!res->valid) 3260 return; 3261 3262 destroy_tis(ndev); 3263 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3264 res->valid = false; 3265 } 3266 3267 static void init_mvqs(struct mlx5_vdpa_net *ndev) 3268 { 3269 struct mlx5_vdpa_virtqueue *mvq; 3270 int i; 3271 3272 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3273 mvq = &ndev->vqs[i]; 3274 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3275 mvq->index = i; 3276 mvq->ndev = ndev; 3277 mvq->fwqp.fw = true; 3278 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3279 } 3280 for (; i < ndev->mvdev.max_vqs; i++) { 3281 mvq = &ndev->vqs[i]; 3282 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3283 mvq->index = i; 3284 mvq->ndev = ndev; 3285 } 3286 } 3287 3288 struct mlx5_vdpa_mgmtdev { 3289 struct vdpa_mgmt_dev mgtdev; 3290 struct mlx5_adev *madev; 3291 struct mlx5_vdpa_net *ndev; 3292 }; 3293 3294 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3295 { 3296 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3297 void *in; 3298 int err; 3299 3300 in = kvzalloc(inlen, GFP_KERNEL); 3301 if (!in) 3302 return -ENOMEM; 3303 3304 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3305 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3306 mtu + MLX5V_ETH_HARD_MTU); 3307 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3308 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3309 3310 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3311 3312 kvfree(in); 3313 return err; 3314 } 3315 3316 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3317 { 3318 struct mlx5_vdpa_irq_pool_entry *ent; 3319 int i; 3320 3321 if (!msix_mode_supported(&ndev->mvdev)) 3322 return; 3323 3324 if (!ndev->mvdev.mdev->pdev) 3325 return; 3326 3327 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3328 if (!ndev->irqp.entries) 3329 return; 3330 3331 3332 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3333 ent = ndev->irqp.entries + i; 3334 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3335 dev_name(&ndev->mvdev.vdev.dev), i); 3336 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3337 if (!ent->map.virq) 3338 return; 3339 3340 ndev->irqp.num_ent++; 3341 } 3342 } 3343 3344 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3345 const struct vdpa_dev_set_config *add_config) 3346 { 3347 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3348 struct virtio_net_config *config; 3349 struct mlx5_core_dev *pfmdev; 3350 struct mlx5_vdpa_dev *mvdev; 3351 struct mlx5_vdpa_net *ndev; 3352 struct mlx5_core_dev *mdev; 3353 u64 device_features; 3354 u32 max_vqs; 3355 u16 mtu; 3356 int err; 3357 3358 if (mgtdev->ndev) 3359 return -ENOSPC; 3360 3361 mdev = mgtdev->madev->mdev; 3362 device_features = mgtdev->mgtdev.supported_features; 3363 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3364 if (add_config->device_features & ~device_features) { 3365 dev_warn(mdev->device, 3366 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3367 add_config->device_features, device_features); 3368 return -EINVAL; 3369 } 3370 device_features &= add_config->device_features; 3371 } else { 3372 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3373 } 3374 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3375 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3376 dev_warn(mdev->device, 3377 "Must provision minimum features 0x%llx for this device", 3378 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3379 return -EOPNOTSUPP; 3380 } 3381 3382 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3383 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3384 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3385 return -EOPNOTSUPP; 3386 } 3387 3388 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3389 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3390 if (max_vqs < 2) { 3391 dev_warn(mdev->device, 3392 "%d virtqueues are supported. At least 2 are required\n", 3393 max_vqs); 3394 return -EAGAIN; 3395 } 3396 3397 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3398 if (add_config->net.max_vq_pairs > max_vqs / 2) 3399 return -EINVAL; 3400 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3401 } else { 3402 max_vqs = 2; 3403 } 3404 3405 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 3406 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3407 if (IS_ERR(ndev)) 3408 return PTR_ERR(ndev); 3409 3410 ndev->mvdev.max_vqs = max_vqs; 3411 mvdev = &ndev->mvdev; 3412 mvdev->mdev = mdev; 3413 3414 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3415 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3416 if (!ndev->vqs || !ndev->event_cbs) { 3417 err = -ENOMEM; 3418 goto err_alloc; 3419 } 3420 3421 init_mvqs(ndev); 3422 allocate_irqs(ndev); 3423 init_rwsem(&ndev->reslock); 3424 config = &ndev->config; 3425 3426 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3427 err = config_func_mtu(mdev, add_config->net.mtu); 3428 if (err) 3429 goto err_alloc; 3430 } 3431 3432 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3433 err = query_mtu(mdev, &mtu); 3434 if (err) 3435 goto err_alloc; 3436 3437 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3438 } 3439 3440 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3441 if (get_link_state(mvdev)) 3442 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3443 else 3444 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3445 } 3446 3447 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3448 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3449 /* No bother setting mac address in config if not going to provision _F_MAC */ 3450 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3451 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3452 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3453 if (err) 3454 goto err_alloc; 3455 } 3456 3457 if (!is_zero_ether_addr(config->mac)) { 3458 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3459 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3460 if (err) 3461 goto err_alloc; 3462 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3463 /* 3464 * We used to clear _F_MAC feature bit if seeing 3465 * zero mac address when device features are not 3466 * specifically provisioned. Keep the behaviour 3467 * so old scripts do not break. 3468 */ 3469 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3470 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3471 /* Don't provision zero mac address for _F_MAC */ 3472 mlx5_vdpa_warn(&ndev->mvdev, 3473 "No mac address provisioned?\n"); 3474 err = -EINVAL; 3475 goto err_alloc; 3476 } 3477 3478 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) 3479 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3480 3481 ndev->mvdev.mlx_features = device_features; 3482 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3483 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3484 if (err) 3485 goto err_mpfs; 3486 3487 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3488 err = mlx5_vdpa_create_mr(mvdev, NULL, 0); 3489 if (err) 3490 goto err_res; 3491 } 3492 3493 err = alloc_resources(ndev); 3494 if (err) 3495 goto err_mr; 3496 3497 ndev->cvq_ent.mvdev = mvdev; 3498 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3499 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3500 if (!mvdev->wq) { 3501 err = -ENOMEM; 3502 goto err_res2; 3503 } 3504 3505 mvdev->vdev.mdev = &mgtdev->mgtdev; 3506 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3507 if (err) 3508 goto err_reg; 3509 3510 mgtdev->ndev = ndev; 3511 return 0; 3512 3513 err_reg: 3514 destroy_workqueue(mvdev->wq); 3515 err_res2: 3516 free_resources(ndev); 3517 err_mr: 3518 mlx5_vdpa_destroy_mr(mvdev); 3519 err_res: 3520 mlx5_vdpa_free_resources(&ndev->mvdev); 3521 err_mpfs: 3522 if (!is_zero_ether_addr(config->mac)) 3523 mlx5_mpfs_del_mac(pfmdev, config->mac); 3524 err_alloc: 3525 put_device(&mvdev->vdev.dev); 3526 return err; 3527 } 3528 3529 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3530 { 3531 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3532 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3533 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3534 struct workqueue_struct *wq; 3535 3536 unregister_link_notifier(ndev); 3537 _vdpa_unregister_device(dev); 3538 wq = mvdev->wq; 3539 mvdev->wq = NULL; 3540 destroy_workqueue(wq); 3541 mgtdev->ndev = NULL; 3542 } 3543 3544 static const struct vdpa_mgmtdev_ops mdev_ops = { 3545 .dev_add = mlx5_vdpa_dev_add, 3546 .dev_del = mlx5_vdpa_dev_del, 3547 }; 3548 3549 static struct virtio_device_id id_table[] = { 3550 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3551 { 0 }, 3552 }; 3553 3554 static int mlx5v_probe(struct auxiliary_device *adev, 3555 const struct auxiliary_device_id *id) 3556 3557 { 3558 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3559 struct mlx5_core_dev *mdev = madev->mdev; 3560 struct mlx5_vdpa_mgmtdev *mgtdev; 3561 int err; 3562 3563 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3564 if (!mgtdev) 3565 return -ENOMEM; 3566 3567 mgtdev->mgtdev.ops = &mdev_ops; 3568 mgtdev->mgtdev.device = mdev->device; 3569 mgtdev->mgtdev.id_table = id_table; 3570 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3571 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3572 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3573 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3574 mgtdev->mgtdev.max_supported_vqs = 3575 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3576 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3577 mgtdev->madev = madev; 3578 3579 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3580 if (err) 3581 goto reg_err; 3582 3583 auxiliary_set_drvdata(adev, mgtdev); 3584 3585 return 0; 3586 3587 reg_err: 3588 kfree(mgtdev); 3589 return err; 3590 } 3591 3592 static void mlx5v_remove(struct auxiliary_device *adev) 3593 { 3594 struct mlx5_vdpa_mgmtdev *mgtdev; 3595 3596 mgtdev = auxiliary_get_drvdata(adev); 3597 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3598 kfree(mgtdev); 3599 } 3600 3601 static const struct auxiliary_device_id mlx5v_id_table[] = { 3602 { .name = MLX5_ADEV_NAME ".vnet", }, 3603 {}, 3604 }; 3605 3606 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3607 3608 static struct auxiliary_driver mlx5v_driver = { 3609 .name = "vnet", 3610 .probe = mlx5v_probe, 3611 .remove = mlx5v_remove, 3612 .id_table = mlx5v_id_table, 3613 }; 3614 3615 module_auxiliary_driver(mlx5v_driver); 3616