1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <linux/virtio_config.h> 11 #include <linux/auxiliary_bus.h> 12 #include <linux/mlx5/cq.h> 13 #include <linux/mlx5/qp.h> 14 #include <linux/mlx5/device.h> 15 #include <linux/mlx5/driver.h> 16 #include <linux/mlx5/vport.h> 17 #include <linux/mlx5/fs.h> 18 #include <linux/mlx5/mlx5_ifc_vdpa.h> 19 #include <linux/mlx5/mpfs.h> 20 #include "mlx5_vdpa.h" 21 #include "mlx5_vnet.h" 22 23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 24 MODULE_DESCRIPTION("Mellanox VDPA driver"); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 27 #define VALID_FEATURES_MASK \ 28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 41 42 #define VALID_STATUS_MASK \ 43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 45 46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 47 48 #define MLX5V_UNTAGGED 0x1000 49 50 struct mlx5_vdpa_cq_buf { 51 struct mlx5_frag_buf_ctrl fbc; 52 struct mlx5_frag_buf frag_buf; 53 int cqe_size; 54 int nent; 55 }; 56 57 struct mlx5_vdpa_cq { 58 struct mlx5_core_cq mcq; 59 struct mlx5_vdpa_cq_buf buf; 60 struct mlx5_db db; 61 int cqe; 62 }; 63 64 struct mlx5_vdpa_umem { 65 struct mlx5_frag_buf_ctrl fbc; 66 struct mlx5_frag_buf frag_buf; 67 int size; 68 u32 id; 69 }; 70 71 struct mlx5_vdpa_qp { 72 struct mlx5_core_qp mqp; 73 struct mlx5_frag_buf frag_buf; 74 struct mlx5_db db; 75 u16 head; 76 bool fw; 77 }; 78 79 struct mlx5_vq_restore_info { 80 u32 num_ent; 81 u64 desc_addr; 82 u64 device_addr; 83 u64 driver_addr; 84 u16 avail_index; 85 u16 used_index; 86 struct msi_map map; 87 bool ready; 88 bool restore; 89 }; 90 91 struct mlx5_vdpa_virtqueue { 92 bool ready; 93 u64 desc_addr; 94 u64 device_addr; 95 u64 driver_addr; 96 u32 num_ent; 97 98 /* Resources for implementing the notification channel from the device 99 * to the driver. fwqp is the firmware end of an RC connection; the 100 * other end is vqqp used by the driver. cq is where completions are 101 * reported. 102 */ 103 struct mlx5_vdpa_cq cq; 104 struct mlx5_vdpa_qp fwqp; 105 struct mlx5_vdpa_qp vqqp; 106 107 /* umem resources are required for the virtqueue operation. They're use 108 * is internal and they must be provided by the driver. 109 */ 110 struct mlx5_vdpa_umem umem1; 111 struct mlx5_vdpa_umem umem2; 112 struct mlx5_vdpa_umem umem3; 113 114 u32 counter_set_id; 115 bool initialized; 116 int index; 117 u32 virtq_id; 118 struct mlx5_vdpa_net *ndev; 119 u16 avail_idx; 120 u16 used_idx; 121 int fw_state; 122 struct msi_map map; 123 124 /* keep last in the struct */ 125 struct mlx5_vq_restore_info ri; 126 }; 127 128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 129 { 130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 132 return idx < 2; 133 else 134 return idx < 3; 135 } 136 137 return idx <= mvdev->max_idx; 138 } 139 140 static void free_resources(struct mlx5_vdpa_net *ndev); 141 static void init_mvqs(struct mlx5_vdpa_net *ndev); 142 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 143 static void teardown_driver(struct mlx5_vdpa_net *ndev); 144 145 static bool mlx5_vdpa_debug; 146 147 #define MLX5_CVQ_MAX_ENT 16 148 149 #define MLX5_LOG_VIO_FLAG(_feature) \ 150 do { \ 151 if (features & BIT_ULL(_feature)) \ 152 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 153 } while (0) 154 155 #define MLX5_LOG_VIO_STAT(_status) \ 156 do { \ 157 if (status & (_status)) \ 158 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 159 } while (0) 160 161 /* TODO: cross-endian support */ 162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 163 { 164 return virtio_legacy_is_little_endian() || 165 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 166 } 167 168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 169 { 170 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 171 } 172 173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 174 { 175 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 176 } 177 178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 179 { 180 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 181 return 2; 182 183 return mvdev->max_vqs; 184 } 185 186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 187 { 188 return idx == ctrl_vq_idx(mvdev); 189 } 190 191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 192 { 193 if (status & ~VALID_STATUS_MASK) 194 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 195 status & ~VALID_STATUS_MASK); 196 197 if (!mlx5_vdpa_debug) 198 return; 199 200 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 201 if (set && !status) { 202 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 203 return; 204 } 205 206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 212 } 213 214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 215 { 216 if (features & ~VALID_FEATURES_MASK) 217 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 218 features & ~VALID_FEATURES_MASK); 219 220 if (!mlx5_vdpa_debug) 221 return; 222 223 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 224 if (!features) 225 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 226 227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 254 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 256 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 258 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 259 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 260 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 261 } 262 263 static int create_tis(struct mlx5_vdpa_net *ndev) 264 { 265 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 266 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 267 void *tisc; 268 int err; 269 270 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 271 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 272 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 273 if (err) 274 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 275 276 return err; 277 } 278 279 static void destroy_tis(struct mlx5_vdpa_net *ndev) 280 { 281 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 282 } 283 284 #define MLX5_VDPA_CQE_SIZE 64 285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 286 287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 288 { 289 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 290 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 291 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 292 int err; 293 294 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 295 ndev->mvdev.mdev->priv.numa_node); 296 if (err) 297 return err; 298 299 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 300 301 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 302 buf->nent = nent; 303 304 return 0; 305 } 306 307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 308 { 309 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 310 311 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 312 ndev->mvdev.mdev->priv.numa_node); 313 } 314 315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 316 { 317 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 318 } 319 320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 321 { 322 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 323 } 324 325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 326 { 327 struct mlx5_cqe64 *cqe64; 328 void *cqe; 329 int i; 330 331 for (i = 0; i < buf->nent; i++) { 332 cqe = get_cqe(vcq, i); 333 cqe64 = cqe; 334 cqe64->op_own = MLX5_CQE_INVALID << 4; 335 } 336 } 337 338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 339 { 340 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 341 342 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 343 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 344 return cqe64; 345 346 return NULL; 347 } 348 349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 350 { 351 vqp->head += n; 352 vqp->db.db[0] = cpu_to_be32(vqp->head); 353 } 354 355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 356 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 357 { 358 struct mlx5_vdpa_qp *vqp; 359 __be64 *pas; 360 void *qpc; 361 362 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 363 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 364 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 365 if (vqp->fw) { 366 /* Firmware QP is allocated by the driver for the firmware's 367 * use so we can skip part of the params as they will be chosen by firmware 368 */ 369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 370 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 371 MLX5_SET(qpc, qpc, no_sq, 1); 372 return; 373 } 374 375 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 376 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 377 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 378 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 379 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 380 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 381 MLX5_SET(qpc, qpc, no_sq, 1); 382 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 383 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 384 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 385 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 386 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 387 } 388 389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 390 { 391 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 392 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 393 ndev->mvdev.mdev->priv.numa_node); 394 } 395 396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 397 { 398 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 399 } 400 401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 402 struct mlx5_vdpa_qp *vqp) 403 { 404 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 405 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 406 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 407 void *qpc; 408 void *in; 409 int err; 410 411 if (!vqp->fw) { 412 vqp = &mvq->vqqp; 413 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 414 if (err) 415 return err; 416 417 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 418 if (err) 419 goto err_db; 420 inlen += vqp->frag_buf.npages * sizeof(__be64); 421 } 422 423 in = kzalloc(inlen, GFP_KERNEL); 424 if (!in) { 425 err = -ENOMEM; 426 goto err_kzalloc; 427 } 428 429 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 430 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 431 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 432 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 433 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 434 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 435 if (!vqp->fw) 436 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 437 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 438 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 439 kfree(in); 440 if (err) 441 goto err_kzalloc; 442 443 vqp->mqp.uid = ndev->mvdev.res.uid; 444 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 445 446 if (!vqp->fw) 447 rx_post(vqp, mvq->num_ent); 448 449 return 0; 450 451 err_kzalloc: 452 if (!vqp->fw) 453 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 454 err_db: 455 if (!vqp->fw) 456 rq_buf_free(ndev, vqp); 457 458 return err; 459 } 460 461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 462 { 463 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 464 465 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 466 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 467 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 468 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 469 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 470 if (!vqp->fw) { 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 rq_buf_free(ndev, vqp); 473 } 474 } 475 476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 477 { 478 return get_sw_cqe(cq, cq->mcq.cons_index); 479 } 480 481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 482 { 483 struct mlx5_cqe64 *cqe64; 484 485 cqe64 = next_cqe_sw(vcq); 486 if (!cqe64) 487 return -EAGAIN; 488 489 vcq->mcq.cons_index++; 490 return 0; 491 } 492 493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 494 { 495 struct mlx5_vdpa_net *ndev = mvq->ndev; 496 struct vdpa_callback *event_cb; 497 498 event_cb = &ndev->event_cbs[mvq->index]; 499 mlx5_cq_set_ci(&mvq->cq.mcq); 500 501 /* make sure CQ cosumer update is visible to the hardware before updating 502 * RX doorbell record. 503 */ 504 dma_wmb(); 505 rx_post(&mvq->vqqp, num); 506 if (event_cb->callback) 507 event_cb->callback(event_cb->private); 508 } 509 510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 511 { 512 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 void __iomem *uar_page = ndev->mvdev.res.uar->map; 515 int num = 0; 516 517 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 518 num++; 519 if (num > mvq->num_ent / 2) { 520 /* If completions keep coming while we poll, we want to 521 * let the hardware know that we consumed them by 522 * updating the doorbell record. We also let vdpa core 523 * know about this so it passes it on the virtio driver 524 * on the guest. 525 */ 526 mlx5_vdpa_handle_completions(mvq, num); 527 num = 0; 528 } 529 } 530 531 if (num) 532 mlx5_vdpa_handle_completions(mvq, num); 533 534 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 535 } 536 537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 538 { 539 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 540 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 541 void __iomem *uar_page = ndev->mvdev.res.uar->map; 542 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 543 struct mlx5_vdpa_cq *vcq = &mvq->cq; 544 __be64 *pas; 545 int inlen; 546 void *cqc; 547 void *in; 548 int err; 549 int eqn; 550 551 err = mlx5_db_alloc(mdev, &vcq->db); 552 if (err) 553 return err; 554 555 vcq->mcq.set_ci_db = vcq->db.db; 556 vcq->mcq.arm_db = vcq->db.db + 1; 557 vcq->mcq.cqe_sz = 64; 558 559 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 560 if (err) 561 goto err_db; 562 563 cq_frag_buf_init(vcq, &vcq->buf); 564 565 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 566 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 567 in = kzalloc(inlen, GFP_KERNEL); 568 if (!in) { 569 err = -ENOMEM; 570 goto err_vzalloc; 571 } 572 573 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 574 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 575 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 576 577 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 578 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 579 580 /* Use vector 0 by default. Consider adding code to choose least used 581 * vector. 582 */ 583 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 584 if (err) 585 goto err_vec; 586 587 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 588 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 589 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 590 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 591 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 592 593 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 594 if (err) 595 goto err_vec; 596 597 vcq->mcq.comp = mlx5_vdpa_cq_comp; 598 vcq->cqe = num_ent; 599 vcq->mcq.set_ci_db = vcq->db.db; 600 vcq->mcq.arm_db = vcq->db.db + 1; 601 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 602 kfree(in); 603 return 0; 604 605 err_vec: 606 kfree(in); 607 err_vzalloc: 608 cq_frag_buf_free(ndev, &vcq->buf); 609 err_db: 610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 611 return err; 612 } 613 614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 615 { 616 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 618 struct mlx5_vdpa_cq *vcq = &mvq->cq; 619 620 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 621 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 622 return; 623 } 624 cq_frag_buf_free(ndev, &vcq->buf); 625 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 626 } 627 628 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 629 struct mlx5_vdpa_umem **umemp) 630 { 631 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 632 int p_a; 633 int p_b; 634 635 switch (num) { 636 case 1: 637 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); 638 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); 639 *umemp = &mvq->umem1; 640 break; 641 case 2: 642 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); 643 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); 644 *umemp = &mvq->umem2; 645 break; 646 case 3: 647 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); 648 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); 649 *umemp = &mvq->umem3; 650 break; 651 } 652 (*umemp)->size = p_a * mvq->num_ent + p_b; 653 } 654 655 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 656 { 657 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 658 } 659 660 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 661 { 662 int inlen; 663 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 664 void *um; 665 void *in; 666 int err; 667 __be64 *pas; 668 struct mlx5_vdpa_umem *umem; 669 670 set_umem_size(ndev, mvq, num, &umem); 671 err = umem_frag_buf_alloc(ndev, umem, umem->size); 672 if (err) 673 return err; 674 675 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 676 677 in = kzalloc(inlen, GFP_KERNEL); 678 if (!in) { 679 err = -ENOMEM; 680 goto err_in; 681 } 682 683 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 684 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 685 um = MLX5_ADDR_OF(create_umem_in, in, umem); 686 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 687 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 688 689 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 690 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 691 692 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 693 if (err) { 694 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 695 goto err_cmd; 696 } 697 698 kfree(in); 699 umem->id = MLX5_GET(create_umem_out, out, umem_id); 700 701 return 0; 702 703 err_cmd: 704 kfree(in); 705 err_in: 706 umem_frag_buf_free(ndev, umem); 707 return err; 708 } 709 710 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 711 { 712 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 713 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 714 struct mlx5_vdpa_umem *umem; 715 716 switch (num) { 717 case 1: 718 umem = &mvq->umem1; 719 break; 720 case 2: 721 umem = &mvq->umem2; 722 break; 723 case 3: 724 umem = &mvq->umem3; 725 break; 726 } 727 728 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 729 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 730 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 731 return; 732 733 umem_frag_buf_free(ndev, umem); 734 } 735 736 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 737 { 738 int num; 739 int err; 740 741 for (num = 1; num <= 3; num++) { 742 err = create_umem(ndev, mvq, num); 743 if (err) 744 goto err_umem; 745 } 746 return 0; 747 748 err_umem: 749 for (num--; num > 0; num--) 750 umem_destroy(ndev, mvq, num); 751 752 return err; 753 } 754 755 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 756 { 757 int num; 758 759 for (num = 3; num > 0; num--) 760 umem_destroy(ndev, mvq, num); 761 } 762 763 static int get_queue_type(struct mlx5_vdpa_net *ndev) 764 { 765 u32 type_mask; 766 767 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 768 769 /* prefer split queue */ 770 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 771 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 772 773 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 774 775 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 776 } 777 778 static bool vq_is_tx(u16 idx) 779 { 780 return idx % 2; 781 } 782 783 enum { 784 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 785 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 786 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 787 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 788 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 789 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 790 MLX5_VIRTIO_NET_F_CSUM = 10, 791 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 792 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 793 }; 794 795 static u16 get_features(u64 features) 796 { 797 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 798 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 799 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 800 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 801 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 802 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 803 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 804 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 805 } 806 807 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 808 { 809 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 810 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 811 } 812 813 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 814 { 815 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 816 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 817 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 818 } 819 820 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 821 { 822 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 823 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 824 void *obj_context; 825 u16 mlx_features; 826 void *cmd_hdr; 827 void *vq_ctx; 828 void *in; 829 int err; 830 831 err = umems_create(ndev, mvq); 832 if (err) 833 return err; 834 835 in = kzalloc(inlen, GFP_KERNEL); 836 if (!in) { 837 err = -ENOMEM; 838 goto err_alloc; 839 } 840 841 mlx_features = get_features(ndev->mvdev.actual_features); 842 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 843 844 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 845 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 846 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 847 848 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 849 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 850 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 851 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 852 mlx_features >> 3); 853 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 854 mlx_features & 7); 855 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 856 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 857 858 if (vq_is_tx(mvq->index)) 859 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 860 861 if (mvq->map.virq) { 862 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 863 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 864 } else { 865 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 866 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 867 } 868 869 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 870 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 871 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 872 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 873 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 874 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 875 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 876 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); 877 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 878 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 879 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 880 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 881 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 882 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 883 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 884 if (counters_supported(&ndev->mvdev)) 885 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 886 887 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 888 if (err) 889 goto err_cmd; 890 891 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 892 kfree(in); 893 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 894 895 return 0; 896 897 err_cmd: 898 kfree(in); 899 err_alloc: 900 umems_destroy(ndev, mvq); 901 return err; 902 } 903 904 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 905 { 906 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 907 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 908 909 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 910 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 911 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 912 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 913 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 914 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 915 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 916 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 917 return; 918 } 919 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 920 umems_destroy(ndev, mvq); 921 } 922 923 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 924 { 925 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 926 } 927 928 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 929 { 930 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 931 } 932 933 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 934 int *outlen, u32 qpn, u32 rqpn) 935 { 936 void *qpc; 937 void *pp; 938 939 switch (cmd) { 940 case MLX5_CMD_OP_2RST_QP: 941 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 942 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 943 *in = kzalloc(*inlen, GFP_KERNEL); 944 *out = kzalloc(*outlen, GFP_KERNEL); 945 if (!*in || !*out) 946 goto outerr; 947 948 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 949 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 950 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 951 break; 952 case MLX5_CMD_OP_RST2INIT_QP: 953 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 954 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 955 *in = kzalloc(*inlen, GFP_KERNEL); 956 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 957 if (!*in || !*out) 958 goto outerr; 959 960 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 961 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 962 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 963 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 964 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 965 MLX5_SET(qpc, qpc, rwe, 1); 966 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 967 MLX5_SET(ads, pp, vhca_port_num, 1); 968 break; 969 case MLX5_CMD_OP_INIT2RTR_QP: 970 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 971 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 972 *in = kzalloc(*inlen, GFP_KERNEL); 973 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 974 if (!*in || !*out) 975 goto outerr; 976 977 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 978 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 979 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 980 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 981 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 982 MLX5_SET(qpc, qpc, log_msg_max, 30); 983 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 984 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 985 MLX5_SET(ads, pp, fl, 1); 986 break; 987 case MLX5_CMD_OP_RTR2RTS_QP: 988 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 989 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 990 *in = kzalloc(*inlen, GFP_KERNEL); 991 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 992 if (!*in || !*out) 993 goto outerr; 994 995 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 996 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 997 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 998 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 999 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1000 MLX5_SET(ads, pp, ack_timeout, 14); 1001 MLX5_SET(qpc, qpc, retry_count, 7); 1002 MLX5_SET(qpc, qpc, rnr_retry, 7); 1003 break; 1004 default: 1005 goto outerr_nullify; 1006 } 1007 1008 return; 1009 1010 outerr: 1011 kfree(*in); 1012 kfree(*out); 1013 outerr_nullify: 1014 *in = NULL; 1015 *out = NULL; 1016 } 1017 1018 static void free_inout(void *in, void *out) 1019 { 1020 kfree(in); 1021 kfree(out); 1022 } 1023 1024 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1025 * firmware. The fw argument indicates whether the subjected QP is the one used 1026 * by firmware. 1027 */ 1028 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1029 { 1030 int outlen; 1031 int inlen; 1032 void *out; 1033 void *in; 1034 int err; 1035 1036 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1037 if (!in || !out) 1038 return -ENOMEM; 1039 1040 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1041 free_inout(in, out); 1042 return err; 1043 } 1044 1045 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1046 { 1047 int err; 1048 1049 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1050 if (err) 1051 return err; 1052 1053 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1054 if (err) 1055 return err; 1056 1057 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1058 if (err) 1059 return err; 1060 1061 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1062 if (err) 1063 return err; 1064 1065 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1066 if (err) 1067 return err; 1068 1069 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1070 if (err) 1071 return err; 1072 1073 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1074 } 1075 1076 struct mlx5_virtq_attr { 1077 u8 state; 1078 u16 available_index; 1079 u16 used_index; 1080 }; 1081 1082 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1083 struct mlx5_virtq_attr *attr) 1084 { 1085 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1086 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1087 void *out; 1088 void *obj_context; 1089 void *cmd_hdr; 1090 int err; 1091 1092 out = kzalloc(outlen, GFP_KERNEL); 1093 if (!out) 1094 return -ENOMEM; 1095 1096 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1097 1098 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1099 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1100 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1101 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1102 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1103 if (err) 1104 goto err_cmd; 1105 1106 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1107 memset(attr, 0, sizeof(*attr)); 1108 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1109 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1110 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1111 kfree(out); 1112 return 0; 1113 1114 err_cmd: 1115 kfree(out); 1116 return err; 1117 } 1118 1119 static bool is_valid_state_change(int oldstate, int newstate) 1120 { 1121 switch (oldstate) { 1122 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1123 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1124 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1125 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1126 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1127 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1128 default: 1129 return false; 1130 } 1131 } 1132 1133 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1134 { 1135 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1136 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1137 void *obj_context; 1138 void *cmd_hdr; 1139 void *in; 1140 int err; 1141 1142 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1143 return 0; 1144 1145 if (!is_valid_state_change(mvq->fw_state, state)) 1146 return -EINVAL; 1147 1148 in = kzalloc(inlen, GFP_KERNEL); 1149 if (!in) 1150 return -ENOMEM; 1151 1152 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1153 1154 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1155 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1156 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1157 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1158 1159 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1160 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1161 MLX5_VIRTQ_MODIFY_MASK_STATE); 1162 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1163 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1164 kfree(in); 1165 if (!err) 1166 mvq->fw_state = state; 1167 1168 return err; 1169 } 1170 1171 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1172 { 1173 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1174 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1175 void *cmd_hdr; 1176 int err; 1177 1178 if (!counters_supported(&ndev->mvdev)) 1179 return 0; 1180 1181 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1182 1183 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1184 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1185 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1186 1187 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1188 if (err) 1189 return err; 1190 1191 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1192 1193 return 0; 1194 } 1195 1196 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1197 { 1198 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1199 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1200 1201 if (!counters_supported(&ndev->mvdev)) 1202 return; 1203 1204 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1205 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1206 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1207 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1208 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1209 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1210 } 1211 1212 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1213 { 1214 struct vdpa_callback *cb = priv; 1215 1216 if (cb->callback) 1217 return cb->callback(cb->private); 1218 1219 return IRQ_HANDLED; 1220 } 1221 1222 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1223 struct mlx5_vdpa_virtqueue *mvq) 1224 { 1225 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1226 struct mlx5_vdpa_irq_pool_entry *ent; 1227 int err; 1228 int i; 1229 1230 for (i = 0; i < irqp->num_ent; i++) { 1231 ent = &irqp->entries[i]; 1232 if (!ent->used) { 1233 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1234 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1235 ent->dev_id = &ndev->event_cbs[mvq->index]; 1236 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1237 ent->name, ent->dev_id); 1238 if (err) 1239 return; 1240 1241 ent->used = true; 1242 mvq->map = ent->map; 1243 return; 1244 } 1245 } 1246 } 1247 1248 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1249 struct mlx5_vdpa_virtqueue *mvq) 1250 { 1251 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1252 int i; 1253 1254 for (i = 0; i < irqp->num_ent; i++) 1255 if (mvq->map.virq == irqp->entries[i].map.virq) { 1256 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1257 irqp->entries[i].used = false; 1258 return; 1259 } 1260 } 1261 1262 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1263 { 1264 u16 idx = mvq->index; 1265 int err; 1266 1267 if (!mvq->num_ent) 1268 return 0; 1269 1270 if (mvq->initialized) 1271 return 0; 1272 1273 err = cq_create(ndev, idx, mvq->num_ent); 1274 if (err) 1275 return err; 1276 1277 err = qp_create(ndev, mvq, &mvq->fwqp); 1278 if (err) 1279 goto err_fwqp; 1280 1281 err = qp_create(ndev, mvq, &mvq->vqqp); 1282 if (err) 1283 goto err_vqqp; 1284 1285 err = connect_qps(ndev, mvq); 1286 if (err) 1287 goto err_connect; 1288 1289 err = counter_set_alloc(ndev, mvq); 1290 if (err) 1291 goto err_connect; 1292 1293 alloc_vector(ndev, mvq); 1294 err = create_virtqueue(ndev, mvq); 1295 if (err) 1296 goto err_vq; 1297 1298 if (mvq->ready) { 1299 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1300 if (err) { 1301 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1302 idx, err); 1303 goto err_modify; 1304 } 1305 } 1306 1307 mvq->initialized = true; 1308 return 0; 1309 1310 err_modify: 1311 destroy_virtqueue(ndev, mvq); 1312 err_vq: 1313 dealloc_vector(ndev, mvq); 1314 counter_set_dealloc(ndev, mvq); 1315 err_connect: 1316 qp_destroy(ndev, &mvq->vqqp); 1317 err_vqqp: 1318 qp_destroy(ndev, &mvq->fwqp); 1319 err_fwqp: 1320 cq_destroy(ndev, idx); 1321 return err; 1322 } 1323 1324 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1325 { 1326 struct mlx5_virtq_attr attr; 1327 1328 if (!mvq->initialized) 1329 return; 1330 1331 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1332 return; 1333 1334 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1335 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1336 1337 if (query_virtqueue(ndev, mvq, &attr)) { 1338 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1339 return; 1340 } 1341 mvq->avail_idx = attr.available_index; 1342 mvq->used_idx = attr.used_index; 1343 } 1344 1345 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1346 { 1347 int i; 1348 1349 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1350 suspend_vq(ndev, &ndev->vqs[i]); 1351 } 1352 1353 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1354 { 1355 if (!mvq->initialized) 1356 return; 1357 1358 suspend_vq(ndev, mvq); 1359 destroy_virtqueue(ndev, mvq); 1360 dealloc_vector(ndev, mvq); 1361 counter_set_dealloc(ndev, mvq); 1362 qp_destroy(ndev, &mvq->vqqp); 1363 qp_destroy(ndev, &mvq->fwqp); 1364 cq_destroy(ndev, mvq->index); 1365 mvq->initialized = false; 1366 } 1367 1368 static int create_rqt(struct mlx5_vdpa_net *ndev) 1369 { 1370 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1371 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1372 __be32 *list; 1373 void *rqtc; 1374 int inlen; 1375 void *in; 1376 int i, j; 1377 int err; 1378 1379 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1380 in = kzalloc(inlen, GFP_KERNEL); 1381 if (!in) 1382 return -ENOMEM; 1383 1384 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1385 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1386 1387 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1388 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1389 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1390 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1391 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1392 1393 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1394 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1395 kfree(in); 1396 if (err) 1397 return err; 1398 1399 return 0; 1400 } 1401 1402 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1403 1404 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1405 { 1406 int act_sz = roundup_pow_of_two(num / 2); 1407 __be32 *list; 1408 void *rqtc; 1409 int inlen; 1410 void *in; 1411 int i, j; 1412 int err; 1413 1414 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1415 in = kzalloc(inlen, GFP_KERNEL); 1416 if (!in) 1417 return -ENOMEM; 1418 1419 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1420 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1421 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1422 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1423 1424 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1425 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1426 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1427 1428 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1429 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1430 kfree(in); 1431 if (err) 1432 return err; 1433 1434 return 0; 1435 } 1436 1437 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1438 { 1439 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1440 } 1441 1442 static int create_tir(struct mlx5_vdpa_net *ndev) 1443 { 1444 #define HASH_IP_L4PORTS \ 1445 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1446 MLX5_HASH_FIELD_SEL_L4_DPORT) 1447 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1448 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1449 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1450 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1451 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1452 void *rss_key; 1453 void *outer; 1454 void *tirc; 1455 void *in; 1456 int err; 1457 1458 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1459 if (!in) 1460 return -ENOMEM; 1461 1462 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1463 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1464 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1465 1466 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1467 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1468 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1469 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1470 1471 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1472 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1473 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1474 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1475 1476 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1477 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1478 1479 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1480 kfree(in); 1481 if (err) 1482 return err; 1483 1484 mlx5_vdpa_add_tirn(ndev); 1485 return err; 1486 } 1487 1488 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1489 { 1490 mlx5_vdpa_remove_tirn(ndev); 1491 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1492 } 1493 1494 #define MAX_STEERING_ENT 0x8000 1495 #define MAX_STEERING_GROUPS 2 1496 1497 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1498 #define NUM_DESTS 2 1499 #else 1500 #define NUM_DESTS 1 1501 #endif 1502 1503 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1504 struct macvlan_node *node, 1505 struct mlx5_flow_act *flow_act, 1506 struct mlx5_flow_destination *dests) 1507 { 1508 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1509 int err; 1510 1511 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1512 if (IS_ERR(node->ucast_counter.counter)) 1513 return PTR_ERR(node->ucast_counter.counter); 1514 1515 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1516 if (IS_ERR(node->mcast_counter.counter)) { 1517 err = PTR_ERR(node->mcast_counter.counter); 1518 goto err_mcast_counter; 1519 } 1520 1521 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1522 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1523 return 0; 1524 1525 err_mcast_counter: 1526 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1527 return err; 1528 #else 1529 return 0; 1530 #endif 1531 } 1532 1533 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1534 struct macvlan_node *node) 1535 { 1536 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1537 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1538 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1539 #endif 1540 } 1541 1542 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1543 struct macvlan_node *node) 1544 { 1545 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1546 struct mlx5_flow_act flow_act = {}; 1547 struct mlx5_flow_spec *spec; 1548 void *headers_c; 1549 void *headers_v; 1550 u8 *dmac_c; 1551 u8 *dmac_v; 1552 int err; 1553 u16 vid; 1554 1555 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1556 if (!spec) 1557 return -ENOMEM; 1558 1559 vid = key2vid(node->macvlan); 1560 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1561 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1562 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1563 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1564 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1565 eth_broadcast_addr(dmac_c); 1566 ether_addr_copy(dmac_v, mac); 1567 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1568 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1569 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1570 } 1571 if (node->tagged) { 1572 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1573 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1574 } 1575 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1576 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1577 dests[0].tir_num = ndev->res.tirn; 1578 err = add_steering_counters(ndev, node, &flow_act, dests); 1579 if (err) 1580 goto out_free; 1581 1582 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1583 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1584 #endif 1585 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1586 if (IS_ERR(node->ucast_rule)) { 1587 err = PTR_ERR(node->ucast_rule); 1588 goto err_ucast; 1589 } 1590 1591 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1592 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1593 #endif 1594 1595 memset(dmac_c, 0, ETH_ALEN); 1596 memset(dmac_v, 0, ETH_ALEN); 1597 dmac_c[0] = 1; 1598 dmac_v[0] = 1; 1599 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1600 if (IS_ERR(node->mcast_rule)) { 1601 err = PTR_ERR(node->mcast_rule); 1602 goto err_mcast; 1603 } 1604 kvfree(spec); 1605 mlx5_vdpa_add_rx_counters(ndev, node); 1606 return 0; 1607 1608 err_mcast: 1609 mlx5_del_flow_rules(node->ucast_rule); 1610 err_ucast: 1611 remove_steering_counters(ndev, node); 1612 out_free: 1613 kvfree(spec); 1614 return err; 1615 } 1616 1617 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1618 struct macvlan_node *node) 1619 { 1620 mlx5_vdpa_remove_rx_counters(ndev, node); 1621 mlx5_del_flow_rules(node->ucast_rule); 1622 mlx5_del_flow_rules(node->mcast_rule); 1623 } 1624 1625 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1626 { 1627 u64 val; 1628 1629 if (!tagged) 1630 vlan = MLX5V_UNTAGGED; 1631 1632 val = (u64)vlan << 48 | 1633 (u64)mac[0] << 40 | 1634 (u64)mac[1] << 32 | 1635 (u64)mac[2] << 24 | 1636 (u64)mac[3] << 16 | 1637 (u64)mac[4] << 8 | 1638 (u64)mac[5]; 1639 1640 return val; 1641 } 1642 1643 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1644 { 1645 struct macvlan_node *pos; 1646 u32 idx; 1647 1648 idx = hash_64(value, 8); // tbd 8 1649 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1650 if (pos->macvlan == value) 1651 return pos; 1652 } 1653 return NULL; 1654 } 1655 1656 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1657 { 1658 struct macvlan_node *ptr; 1659 u64 val; 1660 u32 idx; 1661 int err; 1662 1663 val = search_val(mac, vid, tagged); 1664 if (mac_vlan_lookup(ndev, val)) 1665 return -EEXIST; 1666 1667 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1668 if (!ptr) 1669 return -ENOMEM; 1670 1671 ptr->tagged = tagged; 1672 ptr->macvlan = val; 1673 ptr->ndev = ndev; 1674 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1675 if (err) 1676 goto err_add; 1677 1678 idx = hash_64(val, 8); 1679 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1680 return 0; 1681 1682 err_add: 1683 kfree(ptr); 1684 return err; 1685 } 1686 1687 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1688 { 1689 struct macvlan_node *ptr; 1690 1691 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1692 if (!ptr) 1693 return; 1694 1695 hlist_del(&ptr->hlist); 1696 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1697 remove_steering_counters(ndev, ptr); 1698 kfree(ptr); 1699 } 1700 1701 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1702 { 1703 struct macvlan_node *pos; 1704 struct hlist_node *n; 1705 int i; 1706 1707 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1708 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1709 hlist_del(&pos->hlist); 1710 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1711 remove_steering_counters(ndev, pos); 1712 kfree(pos); 1713 } 1714 } 1715 } 1716 1717 static int setup_steering(struct mlx5_vdpa_net *ndev) 1718 { 1719 struct mlx5_flow_table_attr ft_attr = {}; 1720 struct mlx5_flow_namespace *ns; 1721 int err; 1722 1723 ft_attr.max_fte = MAX_STEERING_ENT; 1724 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1725 1726 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1727 if (!ns) { 1728 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1729 return -EOPNOTSUPP; 1730 } 1731 1732 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1733 if (IS_ERR(ndev->rxft)) { 1734 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1735 return PTR_ERR(ndev->rxft); 1736 } 1737 mlx5_vdpa_add_rx_flow_table(ndev); 1738 1739 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1740 if (err) 1741 goto err_add; 1742 1743 return 0; 1744 1745 err_add: 1746 mlx5_vdpa_remove_rx_flow_table(ndev); 1747 mlx5_destroy_flow_table(ndev->rxft); 1748 return err; 1749 } 1750 1751 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1752 { 1753 clear_mac_vlan_table(ndev); 1754 mlx5_vdpa_remove_rx_flow_table(ndev); 1755 mlx5_destroy_flow_table(ndev->rxft); 1756 } 1757 1758 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1759 { 1760 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1761 struct mlx5_control_vq *cvq = &mvdev->cvq; 1762 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1763 struct mlx5_core_dev *pfmdev; 1764 size_t read; 1765 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1766 1767 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1768 switch (cmd) { 1769 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1770 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1771 if (read != ETH_ALEN) 1772 break; 1773 1774 if (!memcmp(ndev->config.mac, mac, 6)) { 1775 status = VIRTIO_NET_OK; 1776 break; 1777 } 1778 1779 if (is_zero_ether_addr(mac)) 1780 break; 1781 1782 if (!is_zero_ether_addr(ndev->config.mac)) { 1783 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1784 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1785 ndev->config.mac); 1786 break; 1787 } 1788 } 1789 1790 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1791 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1792 mac); 1793 break; 1794 } 1795 1796 /* backup the original mac address so that if failed to add the forward rules 1797 * we could restore it 1798 */ 1799 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1800 1801 memcpy(ndev->config.mac, mac, ETH_ALEN); 1802 1803 /* Need recreate the flow table entry, so that the packet could forward back 1804 */ 1805 mac_vlan_del(ndev, mac_back, 0, false); 1806 1807 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1808 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1809 1810 /* Although it hardly run here, we still need double check */ 1811 if (is_zero_ether_addr(mac_back)) { 1812 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1813 break; 1814 } 1815 1816 /* Try to restore original mac address to MFPS table, and try to restore 1817 * the forward rule entry. 1818 */ 1819 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1820 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1821 ndev->config.mac); 1822 } 1823 1824 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1825 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1826 mac_back); 1827 } 1828 1829 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1830 1831 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1832 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1833 1834 break; 1835 } 1836 1837 status = VIRTIO_NET_OK; 1838 break; 1839 1840 default: 1841 break; 1842 } 1843 1844 return status; 1845 } 1846 1847 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1848 { 1849 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1850 int cur_qps = ndev->cur_num_vqs / 2; 1851 int err; 1852 int i; 1853 1854 if (cur_qps > newqps) { 1855 err = modify_rqt(ndev, 2 * newqps); 1856 if (err) 1857 return err; 1858 1859 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1860 teardown_vq(ndev, &ndev->vqs[i]); 1861 1862 ndev->cur_num_vqs = 2 * newqps; 1863 } else { 1864 ndev->cur_num_vqs = 2 * newqps; 1865 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1866 err = setup_vq(ndev, &ndev->vqs[i]); 1867 if (err) 1868 goto clean_added; 1869 } 1870 err = modify_rqt(ndev, 2 * newqps); 1871 if (err) 1872 goto clean_added; 1873 } 1874 return 0; 1875 1876 clean_added: 1877 for (--i; i >= 2 * cur_qps; --i) 1878 teardown_vq(ndev, &ndev->vqs[i]); 1879 1880 ndev->cur_num_vqs = 2 * cur_qps; 1881 1882 return err; 1883 } 1884 1885 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1886 { 1887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1888 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1889 struct mlx5_control_vq *cvq = &mvdev->cvq; 1890 struct virtio_net_ctrl_mq mq; 1891 size_t read; 1892 u16 newqps; 1893 1894 switch (cmd) { 1895 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1896 /* This mq feature check aligns with pre-existing userspace 1897 * implementation. 1898 * 1899 * Without it, an untrusted driver could fake a multiqueue config 1900 * request down to a non-mq device that may cause kernel to 1901 * panic due to uninitialized resources for extra vqs. Even with 1902 * a well behaving guest driver, it is not expected to allow 1903 * changing the number of vqs on a non-mq device. 1904 */ 1905 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1906 break; 1907 1908 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1909 if (read != sizeof(mq)) 1910 break; 1911 1912 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1913 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1914 newqps > ndev->rqt_size) 1915 break; 1916 1917 if (ndev->cur_num_vqs == 2 * newqps) { 1918 status = VIRTIO_NET_OK; 1919 break; 1920 } 1921 1922 if (!change_num_qps(mvdev, newqps)) 1923 status = VIRTIO_NET_OK; 1924 1925 break; 1926 default: 1927 break; 1928 } 1929 1930 return status; 1931 } 1932 1933 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1934 { 1935 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1936 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1937 struct mlx5_control_vq *cvq = &mvdev->cvq; 1938 __virtio16 vlan; 1939 size_t read; 1940 u16 id; 1941 1942 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 1943 return status; 1944 1945 switch (cmd) { 1946 case VIRTIO_NET_CTRL_VLAN_ADD: 1947 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1948 if (read != sizeof(vlan)) 1949 break; 1950 1951 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1952 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1953 break; 1954 1955 status = VIRTIO_NET_OK; 1956 break; 1957 case VIRTIO_NET_CTRL_VLAN_DEL: 1958 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1959 if (read != sizeof(vlan)) 1960 break; 1961 1962 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1963 mac_vlan_del(ndev, ndev->config.mac, id, true); 1964 status = VIRTIO_NET_OK; 1965 break; 1966 default: 1967 break; 1968 } 1969 1970 return status; 1971 } 1972 1973 static void mlx5_cvq_kick_handler(struct work_struct *work) 1974 { 1975 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1976 struct virtio_net_ctrl_hdr ctrl; 1977 struct mlx5_vdpa_wq_ent *wqent; 1978 struct mlx5_vdpa_dev *mvdev; 1979 struct mlx5_control_vq *cvq; 1980 struct mlx5_vdpa_net *ndev; 1981 size_t read, write; 1982 int err; 1983 1984 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 1985 mvdev = wqent->mvdev; 1986 ndev = to_mlx5_vdpa_ndev(mvdev); 1987 cvq = &mvdev->cvq; 1988 1989 down_write(&ndev->reslock); 1990 1991 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1992 goto out; 1993 1994 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1995 goto out; 1996 1997 if (!cvq->ready) 1998 goto out; 1999 2000 while (true) { 2001 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2002 GFP_ATOMIC); 2003 if (err <= 0) 2004 break; 2005 2006 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2007 if (read != sizeof(ctrl)) 2008 break; 2009 2010 cvq->received_desc++; 2011 switch (ctrl.class) { 2012 case VIRTIO_NET_CTRL_MAC: 2013 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2014 break; 2015 case VIRTIO_NET_CTRL_MQ: 2016 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2017 break; 2018 case VIRTIO_NET_CTRL_VLAN: 2019 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2020 break; 2021 default: 2022 break; 2023 } 2024 2025 /* Make sure data is written before advancing index */ 2026 smp_wmb(); 2027 2028 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2029 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2030 vringh_kiov_cleanup(&cvq->riov); 2031 vringh_kiov_cleanup(&cvq->wiov); 2032 2033 if (vringh_need_notify_iotlb(&cvq->vring)) 2034 vringh_notify(&cvq->vring); 2035 2036 cvq->completed_desc++; 2037 queue_work(mvdev->wq, &wqent->work); 2038 break; 2039 } 2040 2041 out: 2042 up_write(&ndev->reslock); 2043 } 2044 2045 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2046 { 2047 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2048 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2049 struct mlx5_vdpa_virtqueue *mvq; 2050 2051 if (!is_index_valid(mvdev, idx)) 2052 return; 2053 2054 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2055 if (!mvdev->wq || !mvdev->cvq.ready) 2056 return; 2057 2058 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2059 return; 2060 } 2061 2062 mvq = &ndev->vqs[idx]; 2063 if (unlikely(!mvq->ready)) 2064 return; 2065 2066 iowrite16(idx, ndev->mvdev.res.kick_addr); 2067 } 2068 2069 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2070 u64 driver_area, u64 device_area) 2071 { 2072 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2073 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2074 struct mlx5_vdpa_virtqueue *mvq; 2075 2076 if (!is_index_valid(mvdev, idx)) 2077 return -EINVAL; 2078 2079 if (is_ctrl_vq_idx(mvdev, idx)) { 2080 mvdev->cvq.desc_addr = desc_area; 2081 mvdev->cvq.device_addr = device_area; 2082 mvdev->cvq.driver_addr = driver_area; 2083 return 0; 2084 } 2085 2086 mvq = &ndev->vqs[idx]; 2087 mvq->desc_addr = desc_area; 2088 mvq->device_addr = device_area; 2089 mvq->driver_addr = driver_area; 2090 return 0; 2091 } 2092 2093 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2094 { 2095 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2096 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2097 struct mlx5_vdpa_virtqueue *mvq; 2098 2099 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2100 return; 2101 2102 mvq = &ndev->vqs[idx]; 2103 mvq->num_ent = num; 2104 } 2105 2106 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2107 { 2108 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2109 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2110 2111 ndev->event_cbs[idx] = *cb; 2112 if (is_ctrl_vq_idx(mvdev, idx)) 2113 mvdev->cvq.event_cb = *cb; 2114 } 2115 2116 static void mlx5_cvq_notify(struct vringh *vring) 2117 { 2118 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2119 2120 if (!cvq->event_cb.callback) 2121 return; 2122 2123 cvq->event_cb.callback(cvq->event_cb.private); 2124 } 2125 2126 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2127 { 2128 struct mlx5_control_vq *cvq = &mvdev->cvq; 2129 2130 cvq->ready = ready; 2131 if (!ready) 2132 return; 2133 2134 cvq->vring.notify = mlx5_cvq_notify; 2135 } 2136 2137 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2138 { 2139 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2140 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2141 struct mlx5_vdpa_virtqueue *mvq; 2142 int err; 2143 2144 if (!mvdev->actual_features) 2145 return; 2146 2147 if (!is_index_valid(mvdev, idx)) 2148 return; 2149 2150 if (is_ctrl_vq_idx(mvdev, idx)) { 2151 set_cvq_ready(mvdev, ready); 2152 return; 2153 } 2154 2155 mvq = &ndev->vqs[idx]; 2156 if (!ready) { 2157 suspend_vq(ndev, mvq); 2158 } else { 2159 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2160 if (err) { 2161 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2162 ready = false; 2163 } 2164 } 2165 2166 2167 mvq->ready = ready; 2168 } 2169 2170 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2171 { 2172 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2173 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2174 2175 if (!is_index_valid(mvdev, idx)) 2176 return false; 2177 2178 if (is_ctrl_vq_idx(mvdev, idx)) 2179 return mvdev->cvq.ready; 2180 2181 return ndev->vqs[idx].ready; 2182 } 2183 2184 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2185 const struct vdpa_vq_state *state) 2186 { 2187 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2188 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2189 struct mlx5_vdpa_virtqueue *mvq; 2190 2191 if (!is_index_valid(mvdev, idx)) 2192 return -EINVAL; 2193 2194 if (is_ctrl_vq_idx(mvdev, idx)) { 2195 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2196 return 0; 2197 } 2198 2199 mvq = &ndev->vqs[idx]; 2200 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2201 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2202 return -EINVAL; 2203 } 2204 2205 mvq->used_idx = state->split.avail_index; 2206 mvq->avail_idx = state->split.avail_index; 2207 return 0; 2208 } 2209 2210 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2211 { 2212 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2213 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2214 struct mlx5_vdpa_virtqueue *mvq; 2215 struct mlx5_virtq_attr attr; 2216 int err; 2217 2218 if (!is_index_valid(mvdev, idx)) 2219 return -EINVAL; 2220 2221 if (is_ctrl_vq_idx(mvdev, idx)) { 2222 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2223 return 0; 2224 } 2225 2226 mvq = &ndev->vqs[idx]; 2227 /* If the virtq object was destroyed, use the value saved at 2228 * the last minute of suspend_vq. This caters for userspace 2229 * that cares about emulating the index after vq is stopped. 2230 */ 2231 if (!mvq->initialized) { 2232 /* Firmware returns a wrong value for the available index. 2233 * Since both values should be identical, we take the value of 2234 * used_idx which is reported correctly. 2235 */ 2236 state->split.avail_index = mvq->used_idx; 2237 return 0; 2238 } 2239 2240 err = query_virtqueue(ndev, mvq, &attr); 2241 if (err) { 2242 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2243 return err; 2244 } 2245 state->split.avail_index = attr.used_index; 2246 return 0; 2247 } 2248 2249 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2250 { 2251 return PAGE_SIZE; 2252 } 2253 2254 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2255 { 2256 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2257 2258 if (is_ctrl_vq_idx(mvdev, idx)) 2259 return MLX5_VDPA_CVQ_GROUP; 2260 2261 return MLX5_VDPA_DATAVQ_GROUP; 2262 } 2263 2264 static u64 mlx_to_vritio_features(u16 dev_features) 2265 { 2266 u64 result = 0; 2267 2268 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2269 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2270 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2271 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2272 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2273 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2274 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2275 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2276 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2277 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2278 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2279 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2280 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2281 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2282 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2283 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2284 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2285 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2286 2287 return result; 2288 } 2289 2290 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2291 { 2292 u64 mlx_vdpa_features = 0; 2293 u16 dev_features; 2294 2295 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2296 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2297 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2298 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2299 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2300 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2301 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2302 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2303 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2304 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2305 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2306 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2307 2308 return mlx_vdpa_features; 2309 } 2310 2311 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2312 { 2313 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2314 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2315 2316 print_features(mvdev, ndev->mvdev.mlx_features, false); 2317 return ndev->mvdev.mlx_features; 2318 } 2319 2320 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2321 { 2322 /* Minimum features to expect */ 2323 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2324 return -EOPNOTSUPP; 2325 2326 /* Double check features combination sent down by the driver. 2327 * Fail invalid features due to absence of the depended feature. 2328 * 2329 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2330 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2331 * By failing the invalid features sent down by untrusted drivers, 2332 * we're assured the assumption made upon is_index_valid() and 2333 * is_ctrl_vq_idx() will not be compromised. 2334 */ 2335 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2336 BIT_ULL(VIRTIO_NET_F_MQ)) 2337 return -EINVAL; 2338 2339 return 0; 2340 } 2341 2342 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2343 { 2344 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2345 int err; 2346 int i; 2347 2348 for (i = 0; i < mvdev->max_vqs; i++) { 2349 err = setup_vq(ndev, &ndev->vqs[i]); 2350 if (err) 2351 goto err_vq; 2352 } 2353 2354 return 0; 2355 2356 err_vq: 2357 for (--i; i >= 0; i--) 2358 teardown_vq(ndev, &ndev->vqs[i]); 2359 2360 return err; 2361 } 2362 2363 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2364 { 2365 struct mlx5_vdpa_virtqueue *mvq; 2366 int i; 2367 2368 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2369 mvq = &ndev->vqs[i]; 2370 if (!mvq->initialized) 2371 continue; 2372 2373 teardown_vq(ndev, mvq); 2374 } 2375 } 2376 2377 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2378 { 2379 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2380 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2381 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2382 mvdev->max_idx = mvdev->max_vqs; 2383 } else { 2384 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2385 * CVQ gets index 2 2386 */ 2387 mvdev->max_idx = 2; 2388 } 2389 } else { 2390 /* Two data virtqueues only: one for rx and one for tx */ 2391 mvdev->max_idx = 1; 2392 } 2393 } 2394 2395 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2396 { 2397 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2398 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2399 int err; 2400 2401 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2402 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2403 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2404 if (vport) 2405 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2406 2407 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2408 if (err) 2409 return 0; 2410 2411 return MLX5_GET(query_vport_state_out, out, state); 2412 } 2413 2414 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2415 { 2416 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2417 VPORT_STATE_UP) 2418 return true; 2419 2420 return false; 2421 } 2422 2423 static void update_carrier(struct work_struct *work) 2424 { 2425 struct mlx5_vdpa_wq_ent *wqent; 2426 struct mlx5_vdpa_dev *mvdev; 2427 struct mlx5_vdpa_net *ndev; 2428 2429 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2430 mvdev = wqent->mvdev; 2431 ndev = to_mlx5_vdpa_ndev(mvdev); 2432 if (get_link_state(mvdev)) 2433 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2434 else 2435 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2436 2437 if (ndev->config_cb.callback) 2438 ndev->config_cb.callback(ndev->config_cb.private); 2439 2440 kfree(wqent); 2441 } 2442 2443 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2444 { 2445 struct mlx5_vdpa_wq_ent *wqent; 2446 2447 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2448 if (!wqent) 2449 return -ENOMEM; 2450 2451 wqent->mvdev = &ndev->mvdev; 2452 INIT_WORK(&wqent->work, update_carrier); 2453 queue_work(ndev->mvdev.wq, &wqent->work); 2454 return 0; 2455 } 2456 2457 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2458 { 2459 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2460 struct mlx5_eqe *eqe = param; 2461 int ret = NOTIFY_DONE; 2462 2463 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2464 switch (eqe->sub_type) { 2465 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2466 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2467 if (queue_link_work(ndev)) 2468 return NOTIFY_DONE; 2469 2470 ret = NOTIFY_OK; 2471 break; 2472 default: 2473 return NOTIFY_DONE; 2474 } 2475 return ret; 2476 } 2477 return ret; 2478 } 2479 2480 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2481 { 2482 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2483 return; 2484 2485 ndev->nb.notifier_call = event_handler; 2486 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2487 ndev->nb_registered = true; 2488 queue_link_work(ndev); 2489 } 2490 2491 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2492 { 2493 if (!ndev->nb_registered) 2494 return; 2495 2496 ndev->nb_registered = false; 2497 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2498 if (ndev->mvdev.wq) 2499 flush_workqueue(ndev->mvdev.wq); 2500 } 2501 2502 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2503 { 2504 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2505 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2506 int err; 2507 2508 print_features(mvdev, features, true); 2509 2510 err = verify_driver_features(mvdev, features); 2511 if (err) 2512 return err; 2513 2514 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2515 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2516 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2517 else 2518 ndev->rqt_size = 1; 2519 2520 ndev->cur_num_vqs = 2 * ndev->rqt_size; 2521 2522 update_cvq_info(mvdev); 2523 return err; 2524 } 2525 2526 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2527 { 2528 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2529 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2530 2531 ndev->config_cb = *cb; 2532 } 2533 2534 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2535 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2536 { 2537 return MLX5_VDPA_MAX_VQ_ENTRIES; 2538 } 2539 2540 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2541 { 2542 return VIRTIO_ID_NET; 2543 } 2544 2545 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2546 { 2547 return PCI_VENDOR_ID_MELLANOX; 2548 } 2549 2550 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2551 { 2552 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2553 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2554 2555 print_status(mvdev, ndev->mvdev.status, false); 2556 return ndev->mvdev.status; 2557 } 2558 2559 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2560 { 2561 struct mlx5_vq_restore_info *ri = &mvq->ri; 2562 struct mlx5_virtq_attr attr = {}; 2563 int err; 2564 2565 if (mvq->initialized) { 2566 err = query_virtqueue(ndev, mvq, &attr); 2567 if (err) 2568 return err; 2569 } 2570 2571 ri->avail_index = attr.available_index; 2572 ri->used_index = attr.used_index; 2573 ri->ready = mvq->ready; 2574 ri->num_ent = mvq->num_ent; 2575 ri->desc_addr = mvq->desc_addr; 2576 ri->device_addr = mvq->device_addr; 2577 ri->driver_addr = mvq->driver_addr; 2578 ri->map = mvq->map; 2579 ri->restore = true; 2580 return 0; 2581 } 2582 2583 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2584 { 2585 int i; 2586 2587 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2588 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2589 save_channel_info(ndev, &ndev->vqs[i]); 2590 } 2591 return 0; 2592 } 2593 2594 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2595 { 2596 int i; 2597 2598 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2599 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2600 } 2601 2602 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2603 { 2604 struct mlx5_vdpa_virtqueue *mvq; 2605 struct mlx5_vq_restore_info *ri; 2606 int i; 2607 2608 mlx5_clear_vqs(ndev); 2609 init_mvqs(ndev); 2610 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2611 mvq = &ndev->vqs[i]; 2612 ri = &mvq->ri; 2613 if (!ri->restore) 2614 continue; 2615 2616 mvq->avail_idx = ri->avail_index; 2617 mvq->used_idx = ri->used_index; 2618 mvq->ready = ri->ready; 2619 mvq->num_ent = ri->num_ent; 2620 mvq->desc_addr = ri->desc_addr; 2621 mvq->device_addr = ri->device_addr; 2622 mvq->driver_addr = ri->driver_addr; 2623 mvq->map = ri->map; 2624 } 2625 } 2626 2627 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2628 struct vhost_iotlb *iotlb, unsigned int asid) 2629 { 2630 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2631 int err; 2632 2633 suspend_vqs(ndev); 2634 err = save_channels_info(ndev); 2635 if (err) 2636 goto err_mr; 2637 2638 teardown_driver(ndev); 2639 mlx5_vdpa_destroy_mr(mvdev); 2640 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); 2641 if (err) 2642 goto err_mr; 2643 2644 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2645 goto err_mr; 2646 2647 restore_channels_info(ndev); 2648 err = setup_driver(mvdev); 2649 if (err) 2650 goto err_setup; 2651 2652 return 0; 2653 2654 err_setup: 2655 mlx5_vdpa_destroy_mr(mvdev); 2656 err_mr: 2657 return err; 2658 } 2659 2660 /* reslock must be held for this function */ 2661 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2662 { 2663 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2664 int err; 2665 2666 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2667 2668 if (ndev->setup) { 2669 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2670 err = 0; 2671 goto out; 2672 } 2673 mlx5_vdpa_add_debugfs(ndev); 2674 err = setup_virtqueues(mvdev); 2675 if (err) { 2676 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2677 goto err_setup; 2678 } 2679 2680 err = create_rqt(ndev); 2681 if (err) { 2682 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2683 goto err_rqt; 2684 } 2685 2686 err = create_tir(ndev); 2687 if (err) { 2688 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2689 goto err_tir; 2690 } 2691 2692 err = setup_steering(ndev); 2693 if (err) { 2694 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2695 goto err_fwd; 2696 } 2697 ndev->setup = true; 2698 2699 return 0; 2700 2701 err_fwd: 2702 destroy_tir(ndev); 2703 err_tir: 2704 destroy_rqt(ndev); 2705 err_rqt: 2706 teardown_virtqueues(ndev); 2707 err_setup: 2708 mlx5_vdpa_remove_debugfs(ndev->debugfs); 2709 out: 2710 return err; 2711 } 2712 2713 /* reslock must be held for this function */ 2714 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2715 { 2716 2717 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2718 2719 if (!ndev->setup) 2720 return; 2721 2722 mlx5_vdpa_remove_debugfs(ndev->debugfs); 2723 ndev->debugfs = NULL; 2724 teardown_steering(ndev); 2725 destroy_tir(ndev); 2726 destroy_rqt(ndev); 2727 teardown_virtqueues(ndev); 2728 ndev->setup = false; 2729 } 2730 2731 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2732 { 2733 int i; 2734 2735 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2736 ndev->vqs[i].ready = false; 2737 2738 ndev->mvdev.cvq.ready = false; 2739 } 2740 2741 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 2742 { 2743 struct mlx5_control_vq *cvq = &mvdev->cvq; 2744 int err = 0; 2745 2746 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) 2747 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2748 MLX5_CVQ_MAX_ENT, false, 2749 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2750 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2751 (struct vring_used *)(uintptr_t)cvq->device_addr); 2752 2753 return err; 2754 } 2755 2756 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2757 { 2758 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2759 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2760 int err; 2761 2762 print_status(mvdev, status, true); 2763 2764 down_write(&ndev->reslock); 2765 2766 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2767 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2768 err = setup_cvq_vring(mvdev); 2769 if (err) { 2770 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2771 goto err_setup; 2772 } 2773 register_link_notifier(ndev); 2774 err = setup_driver(mvdev); 2775 if (err) { 2776 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2777 goto err_driver; 2778 } 2779 } else { 2780 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2781 goto err_clear; 2782 } 2783 } 2784 2785 ndev->mvdev.status = status; 2786 up_write(&ndev->reslock); 2787 return; 2788 2789 err_driver: 2790 unregister_link_notifier(ndev); 2791 err_setup: 2792 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2793 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2794 err_clear: 2795 up_write(&ndev->reslock); 2796 } 2797 2798 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 2799 { 2800 int i; 2801 2802 /* default mapping all groups are mapped to asid 0 */ 2803 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 2804 mvdev->group2asid[i] = 0; 2805 } 2806 2807 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2808 { 2809 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2810 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2811 2812 print_status(mvdev, 0, true); 2813 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2814 2815 down_write(&ndev->reslock); 2816 unregister_link_notifier(ndev); 2817 teardown_driver(ndev); 2818 clear_vqs_ready(ndev); 2819 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2820 ndev->mvdev.status = 0; 2821 ndev->mvdev.suspended = false; 2822 ndev->cur_num_vqs = 0; 2823 ndev->mvdev.cvq.received_desc = 0; 2824 ndev->mvdev.cvq.completed_desc = 0; 2825 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2826 ndev->mvdev.actual_features = 0; 2827 init_group_to_asid_map(mvdev); 2828 ++mvdev->generation; 2829 2830 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2831 if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) 2832 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2833 } 2834 up_write(&ndev->reslock); 2835 2836 return 0; 2837 } 2838 2839 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2840 { 2841 return sizeof(struct virtio_net_config); 2842 } 2843 2844 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2845 unsigned int len) 2846 { 2847 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2848 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2849 2850 if (offset + len <= sizeof(struct virtio_net_config)) 2851 memcpy(buf, (u8 *)&ndev->config + offset, len); 2852 } 2853 2854 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2855 unsigned int len) 2856 { 2857 /* not supported */ 2858 } 2859 2860 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2861 { 2862 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2863 2864 return mvdev->generation; 2865 } 2866 2867 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 2868 unsigned int asid) 2869 { 2870 bool change_map; 2871 int err; 2872 2873 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); 2874 if (err) { 2875 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2876 return err; 2877 } 2878 2879 if (change_map) 2880 err = mlx5_vdpa_change_map(mvdev, iotlb, asid); 2881 2882 return err; 2883 } 2884 2885 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2886 struct vhost_iotlb *iotlb) 2887 { 2888 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2889 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2890 int err = -EINVAL; 2891 2892 down_write(&ndev->reslock); 2893 err = set_map_data(mvdev, iotlb, asid); 2894 up_write(&ndev->reslock); 2895 return err; 2896 } 2897 2898 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 2899 { 2900 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2901 2902 if (is_ctrl_vq_idx(mvdev, idx)) 2903 return &vdev->dev; 2904 2905 return mvdev->vdev.dma_dev; 2906 } 2907 2908 static void free_irqs(struct mlx5_vdpa_net *ndev) 2909 { 2910 struct mlx5_vdpa_irq_pool_entry *ent; 2911 int i; 2912 2913 if (!msix_mode_supported(&ndev->mvdev)) 2914 return; 2915 2916 if (!ndev->irqp.entries) 2917 return; 2918 2919 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 2920 ent = ndev->irqp.entries + i; 2921 if (ent->map.virq) 2922 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 2923 } 2924 kfree(ndev->irqp.entries); 2925 } 2926 2927 static void mlx5_vdpa_free(struct vdpa_device *vdev) 2928 { 2929 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2930 struct mlx5_core_dev *pfmdev; 2931 struct mlx5_vdpa_net *ndev; 2932 2933 ndev = to_mlx5_vdpa_ndev(mvdev); 2934 2935 free_resources(ndev); 2936 mlx5_vdpa_destroy_mr(mvdev); 2937 if (!is_zero_ether_addr(ndev->config.mac)) { 2938 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2939 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 2940 } 2941 mlx5_vdpa_free_resources(&ndev->mvdev); 2942 free_irqs(ndev); 2943 kfree(ndev->event_cbs); 2944 kfree(ndev->vqs); 2945 } 2946 2947 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 2948 { 2949 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2950 struct vdpa_notification_area ret = {}; 2951 struct mlx5_vdpa_net *ndev; 2952 phys_addr_t addr; 2953 2954 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2955 return ret; 2956 2957 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 2958 * notification to avoid the risk of mapping pages that contain BAR of more 2959 * than one SF 2960 */ 2961 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 2962 return ret; 2963 2964 ndev = to_mlx5_vdpa_ndev(mvdev); 2965 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 2966 ret.addr = addr; 2967 ret.size = PAGE_SIZE; 2968 return ret; 2969 } 2970 2971 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 2972 { 2973 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2974 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2975 struct mlx5_vdpa_virtqueue *mvq; 2976 2977 if (!is_index_valid(mvdev, idx)) 2978 return -EINVAL; 2979 2980 if (is_ctrl_vq_idx(mvdev, idx)) 2981 return -EOPNOTSUPP; 2982 2983 mvq = &ndev->vqs[idx]; 2984 if (!mvq->map.virq) 2985 return -EOPNOTSUPP; 2986 2987 return mvq->map.virq; 2988 } 2989 2990 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 2991 { 2992 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2993 2994 return mvdev->actual_features; 2995 } 2996 2997 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 2998 u64 *received_desc, u64 *completed_desc) 2999 { 3000 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3001 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3002 void *cmd_hdr; 3003 void *ctx; 3004 int err; 3005 3006 if (!counters_supported(&ndev->mvdev)) 3007 return -EOPNOTSUPP; 3008 3009 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3010 return -EAGAIN; 3011 3012 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3013 3014 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3015 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3016 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3017 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3018 3019 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3020 if (err) 3021 return err; 3022 3023 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3024 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3025 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3026 return 0; 3027 } 3028 3029 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3030 struct sk_buff *msg, 3031 struct netlink_ext_ack *extack) 3032 { 3033 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3034 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3035 struct mlx5_vdpa_virtqueue *mvq; 3036 struct mlx5_control_vq *cvq; 3037 u64 received_desc; 3038 u64 completed_desc; 3039 int err = 0; 3040 3041 down_read(&ndev->reslock); 3042 if (!is_index_valid(mvdev, idx)) { 3043 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3044 err = -EINVAL; 3045 goto out_err; 3046 } 3047 3048 if (idx == ctrl_vq_idx(mvdev)) { 3049 cvq = &mvdev->cvq; 3050 received_desc = cvq->received_desc; 3051 completed_desc = cvq->completed_desc; 3052 goto out; 3053 } 3054 3055 mvq = &ndev->vqs[idx]; 3056 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3057 if (err) { 3058 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3059 goto out_err; 3060 } 3061 3062 out: 3063 err = -EMSGSIZE; 3064 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3065 goto out_err; 3066 3067 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3068 VDPA_ATTR_PAD)) 3069 goto out_err; 3070 3071 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3072 goto out_err; 3073 3074 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3075 VDPA_ATTR_PAD)) 3076 goto out_err; 3077 3078 err = 0; 3079 out_err: 3080 up_read(&ndev->reslock); 3081 return err; 3082 } 3083 3084 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3085 { 3086 struct mlx5_control_vq *cvq; 3087 3088 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3089 return; 3090 3091 cvq = &mvdev->cvq; 3092 cvq->ready = false; 3093 } 3094 3095 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3096 { 3097 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3098 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3099 struct mlx5_vdpa_virtqueue *mvq; 3100 int i; 3101 3102 mlx5_vdpa_info(mvdev, "suspending device\n"); 3103 3104 down_write(&ndev->reslock); 3105 unregister_link_notifier(ndev); 3106 for (i = 0; i < ndev->cur_num_vqs; i++) { 3107 mvq = &ndev->vqs[i]; 3108 suspend_vq(ndev, mvq); 3109 } 3110 mlx5_vdpa_cvq_suspend(mvdev); 3111 mvdev->suspended = true; 3112 up_write(&ndev->reslock); 3113 return 0; 3114 } 3115 3116 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3117 unsigned int asid) 3118 { 3119 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3120 3121 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3122 return -EINVAL; 3123 3124 mvdev->group2asid[group] = asid; 3125 return 0; 3126 } 3127 3128 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3129 .set_vq_address = mlx5_vdpa_set_vq_address, 3130 .set_vq_num = mlx5_vdpa_set_vq_num, 3131 .kick_vq = mlx5_vdpa_kick_vq, 3132 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3133 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3134 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3135 .set_vq_state = mlx5_vdpa_set_vq_state, 3136 .get_vq_state = mlx5_vdpa_get_vq_state, 3137 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3138 .get_vq_notification = mlx5_get_vq_notification, 3139 .get_vq_irq = mlx5_get_vq_irq, 3140 .get_vq_align = mlx5_vdpa_get_vq_align, 3141 .get_vq_group = mlx5_vdpa_get_vq_group, 3142 .get_device_features = mlx5_vdpa_get_device_features, 3143 .set_driver_features = mlx5_vdpa_set_driver_features, 3144 .get_driver_features = mlx5_vdpa_get_driver_features, 3145 .set_config_cb = mlx5_vdpa_set_config_cb, 3146 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3147 .get_device_id = mlx5_vdpa_get_device_id, 3148 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3149 .get_status = mlx5_vdpa_get_status, 3150 .set_status = mlx5_vdpa_set_status, 3151 .reset = mlx5_vdpa_reset, 3152 .get_config_size = mlx5_vdpa_get_config_size, 3153 .get_config = mlx5_vdpa_get_config, 3154 .set_config = mlx5_vdpa_set_config, 3155 .get_generation = mlx5_vdpa_get_generation, 3156 .set_map = mlx5_vdpa_set_map, 3157 .set_group_asid = mlx5_set_group_asid, 3158 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3159 .free = mlx5_vdpa_free, 3160 .suspend = mlx5_vdpa_suspend, 3161 }; 3162 3163 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3164 { 3165 u16 hw_mtu; 3166 int err; 3167 3168 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3169 if (err) 3170 return err; 3171 3172 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3173 return 0; 3174 } 3175 3176 static int alloc_resources(struct mlx5_vdpa_net *ndev) 3177 { 3178 struct mlx5_vdpa_net_resources *res = &ndev->res; 3179 int err; 3180 3181 if (res->valid) { 3182 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3183 return -EEXIST; 3184 } 3185 3186 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3187 if (err) 3188 return err; 3189 3190 err = create_tis(ndev); 3191 if (err) 3192 goto err_tis; 3193 3194 res->valid = true; 3195 3196 return 0; 3197 3198 err_tis: 3199 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3200 return err; 3201 } 3202 3203 static void free_resources(struct mlx5_vdpa_net *ndev) 3204 { 3205 struct mlx5_vdpa_net_resources *res = &ndev->res; 3206 3207 if (!res->valid) 3208 return; 3209 3210 destroy_tis(ndev); 3211 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3212 res->valid = false; 3213 } 3214 3215 static void init_mvqs(struct mlx5_vdpa_net *ndev) 3216 { 3217 struct mlx5_vdpa_virtqueue *mvq; 3218 int i; 3219 3220 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3221 mvq = &ndev->vqs[i]; 3222 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3223 mvq->index = i; 3224 mvq->ndev = ndev; 3225 mvq->fwqp.fw = true; 3226 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3227 } 3228 for (; i < ndev->mvdev.max_vqs; i++) { 3229 mvq = &ndev->vqs[i]; 3230 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3231 mvq->index = i; 3232 mvq->ndev = ndev; 3233 } 3234 } 3235 3236 struct mlx5_vdpa_mgmtdev { 3237 struct vdpa_mgmt_dev mgtdev; 3238 struct mlx5_adev *madev; 3239 struct mlx5_vdpa_net *ndev; 3240 }; 3241 3242 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3243 { 3244 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3245 void *in; 3246 int err; 3247 3248 in = kvzalloc(inlen, GFP_KERNEL); 3249 if (!in) 3250 return -ENOMEM; 3251 3252 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3253 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3254 mtu + MLX5V_ETH_HARD_MTU); 3255 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3256 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3257 3258 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3259 3260 kvfree(in); 3261 return err; 3262 } 3263 3264 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3265 { 3266 struct mlx5_vdpa_irq_pool_entry *ent; 3267 int i; 3268 3269 if (!msix_mode_supported(&ndev->mvdev)) 3270 return; 3271 3272 if (!ndev->mvdev.mdev->pdev) 3273 return; 3274 3275 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3276 if (!ndev->irqp.entries) 3277 return; 3278 3279 3280 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3281 ent = ndev->irqp.entries + i; 3282 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3283 dev_name(&ndev->mvdev.vdev.dev), i); 3284 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3285 if (!ent->map.virq) 3286 return; 3287 3288 ndev->irqp.num_ent++; 3289 } 3290 } 3291 3292 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3293 const struct vdpa_dev_set_config *add_config) 3294 { 3295 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3296 struct virtio_net_config *config; 3297 struct mlx5_core_dev *pfmdev; 3298 struct mlx5_vdpa_dev *mvdev; 3299 struct mlx5_vdpa_net *ndev; 3300 struct mlx5_core_dev *mdev; 3301 u64 device_features; 3302 u32 max_vqs; 3303 u16 mtu; 3304 int err; 3305 3306 if (mgtdev->ndev) 3307 return -ENOSPC; 3308 3309 mdev = mgtdev->madev->mdev; 3310 device_features = mgtdev->mgtdev.supported_features; 3311 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3312 if (add_config->device_features & ~device_features) { 3313 dev_warn(mdev->device, 3314 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3315 add_config->device_features, device_features); 3316 return -EINVAL; 3317 } 3318 device_features &= add_config->device_features; 3319 } else { 3320 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3321 } 3322 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3323 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3324 dev_warn(mdev->device, 3325 "Must provision minimum features 0x%llx for this device", 3326 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3327 return -EOPNOTSUPP; 3328 } 3329 3330 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3331 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3332 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3333 return -EOPNOTSUPP; 3334 } 3335 3336 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3337 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3338 if (max_vqs < 2) { 3339 dev_warn(mdev->device, 3340 "%d virtqueues are supported. At least 2 are required\n", 3341 max_vqs); 3342 return -EAGAIN; 3343 } 3344 3345 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3346 if (add_config->net.max_vq_pairs > max_vqs / 2) 3347 return -EINVAL; 3348 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3349 } else { 3350 max_vqs = 2; 3351 } 3352 3353 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 3354 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3355 if (IS_ERR(ndev)) 3356 return PTR_ERR(ndev); 3357 3358 ndev->mvdev.max_vqs = max_vqs; 3359 mvdev = &ndev->mvdev; 3360 mvdev->mdev = mdev; 3361 3362 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3363 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3364 if (!ndev->vqs || !ndev->event_cbs) { 3365 err = -ENOMEM; 3366 goto err_alloc; 3367 } 3368 3369 init_mvqs(ndev); 3370 allocate_irqs(ndev); 3371 init_rwsem(&ndev->reslock); 3372 config = &ndev->config; 3373 3374 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3375 err = config_func_mtu(mdev, add_config->net.mtu); 3376 if (err) 3377 goto err_alloc; 3378 } 3379 3380 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3381 err = query_mtu(mdev, &mtu); 3382 if (err) 3383 goto err_alloc; 3384 3385 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3386 } 3387 3388 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3389 if (get_link_state(mvdev)) 3390 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3391 else 3392 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3393 } 3394 3395 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3396 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3397 /* No bother setting mac address in config if not going to provision _F_MAC */ 3398 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3399 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3400 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3401 if (err) 3402 goto err_alloc; 3403 } 3404 3405 if (!is_zero_ether_addr(config->mac)) { 3406 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3407 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3408 if (err) 3409 goto err_alloc; 3410 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3411 /* 3412 * We used to clear _F_MAC feature bit if seeing 3413 * zero mac address when device features are not 3414 * specifically provisioned. Keep the behaviour 3415 * so old scripts do not break. 3416 */ 3417 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3418 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3419 /* Don't provision zero mac address for _F_MAC */ 3420 mlx5_vdpa_warn(&ndev->mvdev, 3421 "No mac address provisioned?\n"); 3422 err = -EINVAL; 3423 goto err_alloc; 3424 } 3425 3426 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) 3427 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3428 3429 ndev->mvdev.mlx_features = device_features; 3430 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3431 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3432 if (err) 3433 goto err_mpfs; 3434 3435 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3436 err = mlx5_vdpa_create_mr(mvdev, NULL, 0); 3437 if (err) 3438 goto err_res; 3439 } 3440 3441 err = alloc_resources(ndev); 3442 if (err) 3443 goto err_mr; 3444 3445 ndev->cvq_ent.mvdev = mvdev; 3446 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3447 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3448 if (!mvdev->wq) { 3449 err = -ENOMEM; 3450 goto err_res2; 3451 } 3452 3453 mvdev->vdev.mdev = &mgtdev->mgtdev; 3454 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3455 if (err) 3456 goto err_reg; 3457 3458 mgtdev->ndev = ndev; 3459 return 0; 3460 3461 err_reg: 3462 destroy_workqueue(mvdev->wq); 3463 err_res2: 3464 free_resources(ndev); 3465 err_mr: 3466 mlx5_vdpa_destroy_mr(mvdev); 3467 err_res: 3468 mlx5_vdpa_free_resources(&ndev->mvdev); 3469 err_mpfs: 3470 if (!is_zero_ether_addr(config->mac)) 3471 mlx5_mpfs_del_mac(pfmdev, config->mac); 3472 err_alloc: 3473 put_device(&mvdev->vdev.dev); 3474 return err; 3475 } 3476 3477 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3478 { 3479 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3480 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3481 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3482 struct workqueue_struct *wq; 3483 3484 mlx5_vdpa_remove_debugfs(ndev->debugfs); 3485 ndev->debugfs = NULL; 3486 unregister_link_notifier(ndev); 3487 _vdpa_unregister_device(dev); 3488 wq = mvdev->wq; 3489 mvdev->wq = NULL; 3490 destroy_workqueue(wq); 3491 mgtdev->ndev = NULL; 3492 } 3493 3494 static const struct vdpa_mgmtdev_ops mdev_ops = { 3495 .dev_add = mlx5_vdpa_dev_add, 3496 .dev_del = mlx5_vdpa_dev_del, 3497 }; 3498 3499 static struct virtio_device_id id_table[] = { 3500 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3501 { 0 }, 3502 }; 3503 3504 static int mlx5v_probe(struct auxiliary_device *adev, 3505 const struct auxiliary_device_id *id) 3506 3507 { 3508 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3509 struct mlx5_core_dev *mdev = madev->mdev; 3510 struct mlx5_vdpa_mgmtdev *mgtdev; 3511 int err; 3512 3513 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3514 if (!mgtdev) 3515 return -ENOMEM; 3516 3517 mgtdev->mgtdev.ops = &mdev_ops; 3518 mgtdev->mgtdev.device = mdev->device; 3519 mgtdev->mgtdev.id_table = id_table; 3520 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3521 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3522 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3523 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3524 mgtdev->mgtdev.max_supported_vqs = 3525 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3526 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3527 mgtdev->madev = madev; 3528 3529 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3530 if (err) 3531 goto reg_err; 3532 3533 auxiliary_set_drvdata(adev, mgtdev); 3534 3535 return 0; 3536 3537 reg_err: 3538 kfree(mgtdev); 3539 return err; 3540 } 3541 3542 static void mlx5v_remove(struct auxiliary_device *adev) 3543 { 3544 struct mlx5_vdpa_mgmtdev *mgtdev; 3545 3546 mgtdev = auxiliary_get_drvdata(adev); 3547 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3548 kfree(mgtdev); 3549 } 3550 3551 static void mlx5v_shutdown(struct auxiliary_device *auxdev) 3552 { 3553 struct mlx5_vdpa_mgmtdev *mgtdev; 3554 struct mlx5_vdpa_net *ndev; 3555 3556 mgtdev = auxiliary_get_drvdata(auxdev); 3557 ndev = mgtdev->ndev; 3558 3559 free_irqs(ndev); 3560 } 3561 3562 static const struct auxiliary_device_id mlx5v_id_table[] = { 3563 { .name = MLX5_ADEV_NAME ".vnet", }, 3564 {}, 3565 }; 3566 3567 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3568 3569 static struct auxiliary_driver mlx5v_driver = { 3570 .name = "vnet", 3571 .probe = mlx5v_probe, 3572 .remove = mlx5v_remove, 3573 .shutdown = mlx5v_shutdown, 3574 .id_table = mlx5v_id_table, 3575 }; 3576 3577 module_auxiliary_driver(mlx5v_driver); 3578