1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <linux/virtio_config.h> 11 #include <linux/auxiliary_bus.h> 12 #include <linux/mlx5/cq.h> 13 #include <linux/mlx5/qp.h> 14 #include <linux/mlx5/device.h> 15 #include <linux/mlx5/driver.h> 16 #include <linux/mlx5/vport.h> 17 #include <linux/mlx5/fs.h> 18 #include <linux/mlx5/mlx5_ifc_vdpa.h> 19 #include <linux/mlx5/mpfs.h> 20 #include "mlx5_vdpa.h" 21 #include "mlx5_vnet.h" 22 23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 24 MODULE_DESCRIPTION("Mellanox VDPA driver"); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 27 #define VALID_FEATURES_MASK \ 28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 41 42 #define VALID_STATUS_MASK \ 43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 45 46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 47 48 #define MLX5V_UNTAGGED 0x1000 49 50 struct mlx5_vdpa_cq_buf { 51 struct mlx5_frag_buf_ctrl fbc; 52 struct mlx5_frag_buf frag_buf; 53 int cqe_size; 54 int nent; 55 }; 56 57 struct mlx5_vdpa_cq { 58 struct mlx5_core_cq mcq; 59 struct mlx5_vdpa_cq_buf buf; 60 struct mlx5_db db; 61 int cqe; 62 }; 63 64 struct mlx5_vdpa_umem { 65 struct mlx5_frag_buf_ctrl fbc; 66 struct mlx5_frag_buf frag_buf; 67 int size; 68 u32 id; 69 }; 70 71 struct mlx5_vdpa_qp { 72 struct mlx5_core_qp mqp; 73 struct mlx5_frag_buf frag_buf; 74 struct mlx5_db db; 75 u16 head; 76 bool fw; 77 }; 78 79 struct mlx5_vq_restore_info { 80 u32 num_ent; 81 u64 desc_addr; 82 u64 device_addr; 83 u64 driver_addr; 84 u16 avail_index; 85 u16 used_index; 86 struct msi_map map; 87 bool ready; 88 bool restore; 89 }; 90 91 struct mlx5_vdpa_virtqueue { 92 bool ready; 93 u64 desc_addr; 94 u64 device_addr; 95 u64 driver_addr; 96 u32 num_ent; 97 98 /* Resources for implementing the notification channel from the device 99 * to the driver. fwqp is the firmware end of an RC connection; the 100 * other end is vqqp used by the driver. cq is where completions are 101 * reported. 102 */ 103 struct mlx5_vdpa_cq cq; 104 struct mlx5_vdpa_qp fwqp; 105 struct mlx5_vdpa_qp vqqp; 106 107 /* umem resources are required for the virtqueue operation. They're use 108 * is internal and they must be provided by the driver. 109 */ 110 struct mlx5_vdpa_umem umem1; 111 struct mlx5_vdpa_umem umem2; 112 struct mlx5_vdpa_umem umem3; 113 114 u32 counter_set_id; 115 bool initialized; 116 int index; 117 u32 virtq_id; 118 struct mlx5_vdpa_net *ndev; 119 u16 avail_idx; 120 u16 used_idx; 121 int fw_state; 122 struct msi_map map; 123 124 /* keep last in the struct */ 125 struct mlx5_vq_restore_info ri; 126 }; 127 128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 129 { 130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 132 return idx < 2; 133 else 134 return idx < 3; 135 } 136 137 return idx <= mvdev->max_idx; 138 } 139 140 static void free_resources(struct mlx5_vdpa_net *ndev); 141 static void init_mvqs(struct mlx5_vdpa_net *ndev); 142 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 143 static void teardown_driver(struct mlx5_vdpa_net *ndev); 144 145 static bool mlx5_vdpa_debug; 146 147 #define MLX5_CVQ_MAX_ENT 16 148 149 #define MLX5_LOG_VIO_FLAG(_feature) \ 150 do { \ 151 if (features & BIT_ULL(_feature)) \ 152 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 153 } while (0) 154 155 #define MLX5_LOG_VIO_STAT(_status) \ 156 do { \ 157 if (status & (_status)) \ 158 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 159 } while (0) 160 161 /* TODO: cross-endian support */ 162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 163 { 164 return virtio_legacy_is_little_endian() || 165 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 166 } 167 168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 169 { 170 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 171 } 172 173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 174 { 175 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 176 } 177 178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 179 { 180 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 181 return 2; 182 183 return mvdev->max_vqs; 184 } 185 186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 187 { 188 return idx == ctrl_vq_idx(mvdev); 189 } 190 191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 192 { 193 if (status & ~VALID_STATUS_MASK) 194 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 195 status & ~VALID_STATUS_MASK); 196 197 if (!mlx5_vdpa_debug) 198 return; 199 200 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 201 if (set && !status) { 202 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 203 return; 204 } 205 206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 212 } 213 214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 215 { 216 if (features & ~VALID_FEATURES_MASK) 217 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 218 features & ~VALID_FEATURES_MASK); 219 220 if (!mlx5_vdpa_debug) 221 return; 222 223 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 224 if (!features) 225 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 226 227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 254 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 256 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 258 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 259 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 260 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 261 } 262 263 static int create_tis(struct mlx5_vdpa_net *ndev) 264 { 265 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 266 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 267 void *tisc; 268 int err; 269 270 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 271 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 272 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 273 if (err) 274 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 275 276 return err; 277 } 278 279 static void destroy_tis(struct mlx5_vdpa_net *ndev) 280 { 281 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 282 } 283 284 #define MLX5_VDPA_CQE_SIZE 64 285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 286 287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 288 { 289 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 290 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 291 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 292 int err; 293 294 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 295 ndev->mvdev.mdev->priv.numa_node); 296 if (err) 297 return err; 298 299 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 300 301 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 302 buf->nent = nent; 303 304 return 0; 305 } 306 307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 308 { 309 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 310 311 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 312 ndev->mvdev.mdev->priv.numa_node); 313 } 314 315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 316 { 317 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 318 } 319 320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 321 { 322 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 323 } 324 325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 326 { 327 struct mlx5_cqe64 *cqe64; 328 void *cqe; 329 int i; 330 331 for (i = 0; i < buf->nent; i++) { 332 cqe = get_cqe(vcq, i); 333 cqe64 = cqe; 334 cqe64->op_own = MLX5_CQE_INVALID << 4; 335 } 336 } 337 338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 339 { 340 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 341 342 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 343 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 344 return cqe64; 345 346 return NULL; 347 } 348 349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 350 { 351 vqp->head += n; 352 vqp->db.db[0] = cpu_to_be32(vqp->head); 353 } 354 355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 356 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 357 { 358 struct mlx5_vdpa_qp *vqp; 359 __be64 *pas; 360 void *qpc; 361 362 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 363 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 364 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 365 if (vqp->fw) { 366 /* Firmware QP is allocated by the driver for the firmware's 367 * use so we can skip part of the params as they will be chosen by firmware 368 */ 369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 370 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 371 MLX5_SET(qpc, qpc, no_sq, 1); 372 return; 373 } 374 375 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 376 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 377 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 378 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 379 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 380 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 381 MLX5_SET(qpc, qpc, no_sq, 1); 382 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 383 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 384 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 385 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 386 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 387 } 388 389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 390 { 391 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 392 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 393 ndev->mvdev.mdev->priv.numa_node); 394 } 395 396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 397 { 398 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 399 } 400 401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 402 struct mlx5_vdpa_qp *vqp) 403 { 404 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 405 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 406 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 407 void *qpc; 408 void *in; 409 int err; 410 411 if (!vqp->fw) { 412 vqp = &mvq->vqqp; 413 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 414 if (err) 415 return err; 416 417 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 418 if (err) 419 goto err_db; 420 inlen += vqp->frag_buf.npages * sizeof(__be64); 421 } 422 423 in = kzalloc(inlen, GFP_KERNEL); 424 if (!in) { 425 err = -ENOMEM; 426 goto err_kzalloc; 427 } 428 429 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 430 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 431 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 432 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 433 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 434 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 435 if (!vqp->fw) 436 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 437 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 438 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 439 kfree(in); 440 if (err) 441 goto err_kzalloc; 442 443 vqp->mqp.uid = ndev->mvdev.res.uid; 444 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 445 446 if (!vqp->fw) 447 rx_post(vqp, mvq->num_ent); 448 449 return 0; 450 451 err_kzalloc: 452 if (!vqp->fw) 453 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 454 err_db: 455 if (!vqp->fw) 456 rq_buf_free(ndev, vqp); 457 458 return err; 459 } 460 461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 462 { 463 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 464 465 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 466 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 467 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 468 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 469 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 470 if (!vqp->fw) { 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 rq_buf_free(ndev, vqp); 473 } 474 } 475 476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 477 { 478 return get_sw_cqe(cq, cq->mcq.cons_index); 479 } 480 481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 482 { 483 struct mlx5_cqe64 *cqe64; 484 485 cqe64 = next_cqe_sw(vcq); 486 if (!cqe64) 487 return -EAGAIN; 488 489 vcq->mcq.cons_index++; 490 return 0; 491 } 492 493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 494 { 495 struct mlx5_vdpa_net *ndev = mvq->ndev; 496 struct vdpa_callback *event_cb; 497 498 event_cb = &ndev->event_cbs[mvq->index]; 499 mlx5_cq_set_ci(&mvq->cq.mcq); 500 501 /* make sure CQ cosumer update is visible to the hardware before updating 502 * RX doorbell record. 503 */ 504 dma_wmb(); 505 rx_post(&mvq->vqqp, num); 506 if (event_cb->callback) 507 event_cb->callback(event_cb->private); 508 } 509 510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 511 { 512 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 void __iomem *uar_page = ndev->mvdev.res.uar->map; 515 int num = 0; 516 517 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 518 num++; 519 if (num > mvq->num_ent / 2) { 520 /* If completions keep coming while we poll, we want to 521 * let the hardware know that we consumed them by 522 * updating the doorbell record. We also let vdpa core 523 * know about this so it passes it on the virtio driver 524 * on the guest. 525 */ 526 mlx5_vdpa_handle_completions(mvq, num); 527 num = 0; 528 } 529 } 530 531 if (num) 532 mlx5_vdpa_handle_completions(mvq, num); 533 534 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 535 } 536 537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 538 { 539 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 540 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 541 void __iomem *uar_page = ndev->mvdev.res.uar->map; 542 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 543 struct mlx5_vdpa_cq *vcq = &mvq->cq; 544 __be64 *pas; 545 int inlen; 546 void *cqc; 547 void *in; 548 int err; 549 int eqn; 550 551 err = mlx5_db_alloc(mdev, &vcq->db); 552 if (err) 553 return err; 554 555 vcq->mcq.set_ci_db = vcq->db.db; 556 vcq->mcq.arm_db = vcq->db.db + 1; 557 vcq->mcq.cqe_sz = 64; 558 559 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 560 if (err) 561 goto err_db; 562 563 cq_frag_buf_init(vcq, &vcq->buf); 564 565 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 566 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 567 in = kzalloc(inlen, GFP_KERNEL); 568 if (!in) { 569 err = -ENOMEM; 570 goto err_vzalloc; 571 } 572 573 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 574 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 575 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 576 577 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 578 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 579 580 /* Use vector 0 by default. Consider adding code to choose least used 581 * vector. 582 */ 583 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 584 if (err) 585 goto err_vec; 586 587 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 588 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 589 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 590 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 591 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 592 593 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 594 if (err) 595 goto err_vec; 596 597 vcq->mcq.comp = mlx5_vdpa_cq_comp; 598 vcq->cqe = num_ent; 599 vcq->mcq.set_ci_db = vcq->db.db; 600 vcq->mcq.arm_db = vcq->db.db + 1; 601 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 602 kfree(in); 603 return 0; 604 605 err_vec: 606 kfree(in); 607 err_vzalloc: 608 cq_frag_buf_free(ndev, &vcq->buf); 609 err_db: 610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 611 return err; 612 } 613 614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 615 { 616 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 618 struct mlx5_vdpa_cq *vcq = &mvq->cq; 619 620 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 621 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 622 return; 623 } 624 cq_frag_buf_free(ndev, &vcq->buf); 625 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 626 } 627 628 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 629 struct mlx5_vdpa_umem **umemp) 630 { 631 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 632 int p_a; 633 int p_b; 634 635 switch (num) { 636 case 1: 637 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); 638 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); 639 *umemp = &mvq->umem1; 640 break; 641 case 2: 642 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); 643 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); 644 *umemp = &mvq->umem2; 645 break; 646 case 3: 647 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); 648 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); 649 *umemp = &mvq->umem3; 650 break; 651 } 652 (*umemp)->size = p_a * mvq->num_ent + p_b; 653 } 654 655 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 656 { 657 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 658 } 659 660 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 661 { 662 int inlen; 663 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 664 void *um; 665 void *in; 666 int err; 667 __be64 *pas; 668 struct mlx5_vdpa_umem *umem; 669 670 set_umem_size(ndev, mvq, num, &umem); 671 err = umem_frag_buf_alloc(ndev, umem, umem->size); 672 if (err) 673 return err; 674 675 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 676 677 in = kzalloc(inlen, GFP_KERNEL); 678 if (!in) { 679 err = -ENOMEM; 680 goto err_in; 681 } 682 683 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 684 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 685 um = MLX5_ADDR_OF(create_umem_in, in, umem); 686 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 687 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 688 689 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 690 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 691 692 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 693 if (err) { 694 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 695 goto err_cmd; 696 } 697 698 kfree(in); 699 umem->id = MLX5_GET(create_umem_out, out, umem_id); 700 701 return 0; 702 703 err_cmd: 704 kfree(in); 705 err_in: 706 umem_frag_buf_free(ndev, umem); 707 return err; 708 } 709 710 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 711 { 712 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 713 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 714 struct mlx5_vdpa_umem *umem; 715 716 switch (num) { 717 case 1: 718 umem = &mvq->umem1; 719 break; 720 case 2: 721 umem = &mvq->umem2; 722 break; 723 case 3: 724 umem = &mvq->umem3; 725 break; 726 } 727 728 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 729 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 730 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 731 return; 732 733 umem_frag_buf_free(ndev, umem); 734 } 735 736 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 737 { 738 int num; 739 int err; 740 741 for (num = 1; num <= 3; num++) { 742 err = create_umem(ndev, mvq, num); 743 if (err) 744 goto err_umem; 745 } 746 return 0; 747 748 err_umem: 749 for (num--; num > 0; num--) 750 umem_destroy(ndev, mvq, num); 751 752 return err; 753 } 754 755 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 756 { 757 int num; 758 759 for (num = 3; num > 0; num--) 760 umem_destroy(ndev, mvq, num); 761 } 762 763 static int get_queue_type(struct mlx5_vdpa_net *ndev) 764 { 765 u32 type_mask; 766 767 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 768 769 /* prefer split queue */ 770 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 771 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 772 773 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 774 775 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 776 } 777 778 static bool vq_is_tx(u16 idx) 779 { 780 return idx % 2; 781 } 782 783 enum { 784 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 785 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 786 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 787 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 788 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 789 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 790 MLX5_VIRTIO_NET_F_CSUM = 10, 791 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 792 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 793 }; 794 795 static u16 get_features(u64 features) 796 { 797 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 798 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 799 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 800 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 801 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 802 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 803 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 804 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 805 } 806 807 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 808 { 809 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 810 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 811 } 812 813 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 814 { 815 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 816 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 817 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 818 } 819 820 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 821 { 822 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 823 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 824 void *obj_context; 825 u16 mlx_features; 826 void *cmd_hdr; 827 void *vq_ctx; 828 void *in; 829 int err; 830 831 err = umems_create(ndev, mvq); 832 if (err) 833 return err; 834 835 in = kzalloc(inlen, GFP_KERNEL); 836 if (!in) { 837 err = -ENOMEM; 838 goto err_alloc; 839 } 840 841 mlx_features = get_features(ndev->mvdev.actual_features); 842 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 843 844 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 845 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 846 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 847 848 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 849 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 850 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 851 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 852 mlx_features >> 3); 853 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 854 mlx_features & 7); 855 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 856 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 857 858 if (vq_is_tx(mvq->index)) 859 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 860 861 if (mvq->map.virq) { 862 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 863 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 864 } else { 865 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 866 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 867 } 868 869 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 870 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 871 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 872 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 873 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 874 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 875 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 876 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); 877 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 878 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 879 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 880 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 881 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 882 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 883 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 884 if (counters_supported(&ndev->mvdev)) 885 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 886 887 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 888 if (err) 889 goto err_cmd; 890 891 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 892 kfree(in); 893 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 894 895 return 0; 896 897 err_cmd: 898 kfree(in); 899 err_alloc: 900 umems_destroy(ndev, mvq); 901 return err; 902 } 903 904 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 905 { 906 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 907 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 908 909 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 910 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 911 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 912 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 913 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 914 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 915 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 916 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 917 return; 918 } 919 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 920 umems_destroy(ndev, mvq); 921 } 922 923 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 924 { 925 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 926 } 927 928 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 929 { 930 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 931 } 932 933 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 934 int *outlen, u32 qpn, u32 rqpn) 935 { 936 void *qpc; 937 void *pp; 938 939 switch (cmd) { 940 case MLX5_CMD_OP_2RST_QP: 941 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 942 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 943 *in = kzalloc(*inlen, GFP_KERNEL); 944 *out = kzalloc(*outlen, GFP_KERNEL); 945 if (!*in || !*out) 946 goto outerr; 947 948 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 949 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 950 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 951 break; 952 case MLX5_CMD_OP_RST2INIT_QP: 953 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 954 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 955 *in = kzalloc(*inlen, GFP_KERNEL); 956 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 957 if (!*in || !*out) 958 goto outerr; 959 960 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 961 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 962 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 963 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 964 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 965 MLX5_SET(qpc, qpc, rwe, 1); 966 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 967 MLX5_SET(ads, pp, vhca_port_num, 1); 968 break; 969 case MLX5_CMD_OP_INIT2RTR_QP: 970 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 971 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 972 *in = kzalloc(*inlen, GFP_KERNEL); 973 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 974 if (!*in || !*out) 975 goto outerr; 976 977 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 978 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 979 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 980 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 981 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 982 MLX5_SET(qpc, qpc, log_msg_max, 30); 983 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 984 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 985 MLX5_SET(ads, pp, fl, 1); 986 break; 987 case MLX5_CMD_OP_RTR2RTS_QP: 988 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 989 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 990 *in = kzalloc(*inlen, GFP_KERNEL); 991 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 992 if (!*in || !*out) 993 goto outerr; 994 995 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 996 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 997 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 998 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 999 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1000 MLX5_SET(ads, pp, ack_timeout, 14); 1001 MLX5_SET(qpc, qpc, retry_count, 7); 1002 MLX5_SET(qpc, qpc, rnr_retry, 7); 1003 break; 1004 default: 1005 goto outerr_nullify; 1006 } 1007 1008 return; 1009 1010 outerr: 1011 kfree(*in); 1012 kfree(*out); 1013 outerr_nullify: 1014 *in = NULL; 1015 *out = NULL; 1016 } 1017 1018 static void free_inout(void *in, void *out) 1019 { 1020 kfree(in); 1021 kfree(out); 1022 } 1023 1024 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1025 * firmware. The fw argument indicates whether the subjected QP is the one used 1026 * by firmware. 1027 */ 1028 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1029 { 1030 int outlen; 1031 int inlen; 1032 void *out; 1033 void *in; 1034 int err; 1035 1036 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1037 if (!in || !out) 1038 return -ENOMEM; 1039 1040 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1041 free_inout(in, out); 1042 return err; 1043 } 1044 1045 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1046 { 1047 int err; 1048 1049 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1050 if (err) 1051 return err; 1052 1053 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1054 if (err) 1055 return err; 1056 1057 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1058 if (err) 1059 return err; 1060 1061 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1062 if (err) 1063 return err; 1064 1065 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1066 if (err) 1067 return err; 1068 1069 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1070 if (err) 1071 return err; 1072 1073 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1074 } 1075 1076 struct mlx5_virtq_attr { 1077 u8 state; 1078 u16 available_index; 1079 u16 used_index; 1080 }; 1081 1082 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1083 struct mlx5_virtq_attr *attr) 1084 { 1085 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1086 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1087 void *out; 1088 void *obj_context; 1089 void *cmd_hdr; 1090 int err; 1091 1092 out = kzalloc(outlen, GFP_KERNEL); 1093 if (!out) 1094 return -ENOMEM; 1095 1096 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1097 1098 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1099 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1100 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1101 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1102 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1103 if (err) 1104 goto err_cmd; 1105 1106 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1107 memset(attr, 0, sizeof(*attr)); 1108 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1109 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1110 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1111 kfree(out); 1112 return 0; 1113 1114 err_cmd: 1115 kfree(out); 1116 return err; 1117 } 1118 1119 static bool is_valid_state_change(int oldstate, int newstate) 1120 { 1121 switch (oldstate) { 1122 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1123 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1124 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1125 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1126 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1127 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1128 default: 1129 return false; 1130 } 1131 } 1132 1133 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1134 { 1135 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1136 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1137 void *obj_context; 1138 void *cmd_hdr; 1139 void *in; 1140 int err; 1141 1142 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1143 return 0; 1144 1145 if (!is_valid_state_change(mvq->fw_state, state)) 1146 return -EINVAL; 1147 1148 in = kzalloc(inlen, GFP_KERNEL); 1149 if (!in) 1150 return -ENOMEM; 1151 1152 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1153 1154 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1155 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1156 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1157 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1158 1159 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1160 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1161 MLX5_VIRTQ_MODIFY_MASK_STATE); 1162 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1163 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1164 kfree(in); 1165 if (!err) 1166 mvq->fw_state = state; 1167 1168 return err; 1169 } 1170 1171 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1172 { 1173 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1174 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1175 void *cmd_hdr; 1176 int err; 1177 1178 if (!counters_supported(&ndev->mvdev)) 1179 return 0; 1180 1181 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1182 1183 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1184 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1185 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1186 1187 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1188 if (err) 1189 return err; 1190 1191 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1192 1193 return 0; 1194 } 1195 1196 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1197 { 1198 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1199 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1200 1201 if (!counters_supported(&ndev->mvdev)) 1202 return; 1203 1204 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1205 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1206 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1207 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1208 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1209 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1210 } 1211 1212 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1213 { 1214 struct vdpa_callback *cb = priv; 1215 1216 if (cb->callback) 1217 return cb->callback(cb->private); 1218 1219 return IRQ_HANDLED; 1220 } 1221 1222 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1223 struct mlx5_vdpa_virtqueue *mvq) 1224 { 1225 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1226 struct mlx5_vdpa_irq_pool_entry *ent; 1227 int err; 1228 int i; 1229 1230 for (i = 0; i < irqp->num_ent; i++) { 1231 ent = &irqp->entries[i]; 1232 if (!ent->used) { 1233 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1234 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1235 ent->dev_id = &ndev->event_cbs[mvq->index]; 1236 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1237 ent->name, ent->dev_id); 1238 if (err) 1239 return; 1240 1241 ent->used = true; 1242 mvq->map = ent->map; 1243 return; 1244 } 1245 } 1246 } 1247 1248 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1249 struct mlx5_vdpa_virtqueue *mvq) 1250 { 1251 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1252 int i; 1253 1254 for (i = 0; i < irqp->num_ent; i++) 1255 if (mvq->map.virq == irqp->entries[i].map.virq) { 1256 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1257 irqp->entries[i].used = false; 1258 return; 1259 } 1260 } 1261 1262 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1263 { 1264 u16 idx = mvq->index; 1265 int err; 1266 1267 if (!mvq->num_ent) 1268 return 0; 1269 1270 if (mvq->initialized) 1271 return 0; 1272 1273 err = cq_create(ndev, idx, mvq->num_ent); 1274 if (err) 1275 return err; 1276 1277 err = qp_create(ndev, mvq, &mvq->fwqp); 1278 if (err) 1279 goto err_fwqp; 1280 1281 err = qp_create(ndev, mvq, &mvq->vqqp); 1282 if (err) 1283 goto err_vqqp; 1284 1285 err = connect_qps(ndev, mvq); 1286 if (err) 1287 goto err_connect; 1288 1289 err = counter_set_alloc(ndev, mvq); 1290 if (err) 1291 goto err_connect; 1292 1293 alloc_vector(ndev, mvq); 1294 err = create_virtqueue(ndev, mvq); 1295 if (err) 1296 goto err_vq; 1297 1298 if (mvq->ready) { 1299 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1300 if (err) { 1301 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1302 idx, err); 1303 goto err_modify; 1304 } 1305 } 1306 1307 mvq->initialized = true; 1308 return 0; 1309 1310 err_modify: 1311 destroy_virtqueue(ndev, mvq); 1312 err_vq: 1313 dealloc_vector(ndev, mvq); 1314 counter_set_dealloc(ndev, mvq); 1315 err_connect: 1316 qp_destroy(ndev, &mvq->vqqp); 1317 err_vqqp: 1318 qp_destroy(ndev, &mvq->fwqp); 1319 err_fwqp: 1320 cq_destroy(ndev, idx); 1321 return err; 1322 } 1323 1324 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1325 { 1326 struct mlx5_virtq_attr attr; 1327 1328 if (!mvq->initialized) 1329 return; 1330 1331 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1332 return; 1333 1334 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1335 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1336 1337 if (query_virtqueue(ndev, mvq, &attr)) { 1338 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1339 return; 1340 } 1341 mvq->avail_idx = attr.available_index; 1342 mvq->used_idx = attr.used_index; 1343 } 1344 1345 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1346 { 1347 int i; 1348 1349 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1350 suspend_vq(ndev, &ndev->vqs[i]); 1351 } 1352 1353 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1354 { 1355 if (!mvq->initialized) 1356 return; 1357 1358 suspend_vq(ndev, mvq); 1359 destroy_virtqueue(ndev, mvq); 1360 dealloc_vector(ndev, mvq); 1361 counter_set_dealloc(ndev, mvq); 1362 qp_destroy(ndev, &mvq->vqqp); 1363 qp_destroy(ndev, &mvq->fwqp); 1364 cq_destroy(ndev, mvq->index); 1365 mvq->initialized = false; 1366 } 1367 1368 static int create_rqt(struct mlx5_vdpa_net *ndev) 1369 { 1370 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1371 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1372 __be32 *list; 1373 void *rqtc; 1374 int inlen; 1375 void *in; 1376 int i, j; 1377 int err; 1378 1379 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1380 in = kzalloc(inlen, GFP_KERNEL); 1381 if (!in) 1382 return -ENOMEM; 1383 1384 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1385 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1386 1387 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1388 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1389 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1390 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1391 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1392 1393 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1394 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1395 kfree(in); 1396 if (err) 1397 return err; 1398 1399 return 0; 1400 } 1401 1402 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1403 1404 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1405 { 1406 int act_sz = roundup_pow_of_two(num / 2); 1407 __be32 *list; 1408 void *rqtc; 1409 int inlen; 1410 void *in; 1411 int i, j; 1412 int err; 1413 1414 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1415 in = kzalloc(inlen, GFP_KERNEL); 1416 if (!in) 1417 return -ENOMEM; 1418 1419 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1420 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1421 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1422 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1423 1424 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1425 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1426 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1427 1428 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1429 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1430 kfree(in); 1431 if (err) 1432 return err; 1433 1434 return 0; 1435 } 1436 1437 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1438 { 1439 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1440 } 1441 1442 static int create_tir(struct mlx5_vdpa_net *ndev) 1443 { 1444 #define HASH_IP_L4PORTS \ 1445 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1446 MLX5_HASH_FIELD_SEL_L4_DPORT) 1447 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1448 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1449 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1450 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1451 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1452 void *rss_key; 1453 void *outer; 1454 void *tirc; 1455 void *in; 1456 int err; 1457 1458 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1459 if (!in) 1460 return -ENOMEM; 1461 1462 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1463 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1464 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1465 1466 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1467 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1468 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1469 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1470 1471 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1472 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1473 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1474 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1475 1476 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1477 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1478 1479 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1480 kfree(in); 1481 if (err) 1482 return err; 1483 1484 mlx5_vdpa_add_tirn(ndev); 1485 return err; 1486 } 1487 1488 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1489 { 1490 mlx5_vdpa_remove_tirn(ndev); 1491 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1492 } 1493 1494 #define MAX_STEERING_ENT 0x8000 1495 #define MAX_STEERING_GROUPS 2 1496 1497 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1498 #define NUM_DESTS 2 1499 #else 1500 #define NUM_DESTS 1 1501 #endif 1502 1503 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1504 struct macvlan_node *node, 1505 struct mlx5_flow_act *flow_act, 1506 struct mlx5_flow_destination *dests) 1507 { 1508 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1509 int err; 1510 1511 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1512 if (IS_ERR(node->ucast_counter.counter)) 1513 return PTR_ERR(node->ucast_counter.counter); 1514 1515 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1516 if (IS_ERR(node->mcast_counter.counter)) { 1517 err = PTR_ERR(node->mcast_counter.counter); 1518 goto err_mcast_counter; 1519 } 1520 1521 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1522 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1523 return 0; 1524 1525 err_mcast_counter: 1526 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1527 return err; 1528 #else 1529 return 0; 1530 #endif 1531 } 1532 1533 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1534 struct macvlan_node *node) 1535 { 1536 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1537 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1538 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1539 #endif 1540 } 1541 1542 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1543 struct macvlan_node *node) 1544 { 1545 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1546 struct mlx5_flow_act flow_act = {}; 1547 struct mlx5_flow_spec *spec; 1548 void *headers_c; 1549 void *headers_v; 1550 u8 *dmac_c; 1551 u8 *dmac_v; 1552 int err; 1553 u16 vid; 1554 1555 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1556 if (!spec) 1557 return -ENOMEM; 1558 1559 vid = key2vid(node->macvlan); 1560 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1561 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1562 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1563 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1564 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1565 eth_broadcast_addr(dmac_c); 1566 ether_addr_copy(dmac_v, mac); 1567 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1568 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1569 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1570 } 1571 if (node->tagged) { 1572 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1573 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1574 } 1575 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1576 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1577 dests[0].tir_num = ndev->res.tirn; 1578 err = add_steering_counters(ndev, node, &flow_act, dests); 1579 if (err) 1580 goto out_free; 1581 1582 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1583 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1584 #endif 1585 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1586 if (IS_ERR(node->ucast_rule)) { 1587 err = PTR_ERR(node->ucast_rule); 1588 goto err_ucast; 1589 } 1590 1591 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1592 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1593 #endif 1594 1595 memset(dmac_c, 0, ETH_ALEN); 1596 memset(dmac_v, 0, ETH_ALEN); 1597 dmac_c[0] = 1; 1598 dmac_v[0] = 1; 1599 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1600 if (IS_ERR(node->mcast_rule)) { 1601 err = PTR_ERR(node->mcast_rule); 1602 goto err_mcast; 1603 } 1604 kvfree(spec); 1605 mlx5_vdpa_add_rx_counters(ndev, node); 1606 return 0; 1607 1608 err_mcast: 1609 mlx5_del_flow_rules(node->ucast_rule); 1610 err_ucast: 1611 remove_steering_counters(ndev, node); 1612 out_free: 1613 kvfree(spec); 1614 return err; 1615 } 1616 1617 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1618 struct macvlan_node *node) 1619 { 1620 mlx5_vdpa_remove_rx_counters(ndev, node); 1621 mlx5_del_flow_rules(node->ucast_rule); 1622 mlx5_del_flow_rules(node->mcast_rule); 1623 } 1624 1625 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1626 { 1627 u64 val; 1628 1629 if (!tagged) 1630 vlan = MLX5V_UNTAGGED; 1631 1632 val = (u64)vlan << 48 | 1633 (u64)mac[0] << 40 | 1634 (u64)mac[1] << 32 | 1635 (u64)mac[2] << 24 | 1636 (u64)mac[3] << 16 | 1637 (u64)mac[4] << 8 | 1638 (u64)mac[5]; 1639 1640 return val; 1641 } 1642 1643 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1644 { 1645 struct macvlan_node *pos; 1646 u32 idx; 1647 1648 idx = hash_64(value, 8); // tbd 8 1649 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1650 if (pos->macvlan == value) 1651 return pos; 1652 } 1653 return NULL; 1654 } 1655 1656 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1657 { 1658 struct macvlan_node *ptr; 1659 u64 val; 1660 u32 idx; 1661 int err; 1662 1663 val = search_val(mac, vid, tagged); 1664 if (mac_vlan_lookup(ndev, val)) 1665 return -EEXIST; 1666 1667 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1668 if (!ptr) 1669 return -ENOMEM; 1670 1671 ptr->tagged = tagged; 1672 ptr->macvlan = val; 1673 ptr->ndev = ndev; 1674 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1675 if (err) 1676 goto err_add; 1677 1678 idx = hash_64(val, 8); 1679 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1680 return 0; 1681 1682 err_add: 1683 kfree(ptr); 1684 return err; 1685 } 1686 1687 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1688 { 1689 struct macvlan_node *ptr; 1690 1691 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1692 if (!ptr) 1693 return; 1694 1695 hlist_del(&ptr->hlist); 1696 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1697 remove_steering_counters(ndev, ptr); 1698 kfree(ptr); 1699 } 1700 1701 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1702 { 1703 struct macvlan_node *pos; 1704 struct hlist_node *n; 1705 int i; 1706 1707 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1708 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1709 hlist_del(&pos->hlist); 1710 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1711 remove_steering_counters(ndev, pos); 1712 kfree(pos); 1713 } 1714 } 1715 } 1716 1717 static int setup_steering(struct mlx5_vdpa_net *ndev) 1718 { 1719 struct mlx5_flow_table_attr ft_attr = {}; 1720 struct mlx5_flow_namespace *ns; 1721 int err; 1722 1723 ft_attr.max_fte = MAX_STEERING_ENT; 1724 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1725 1726 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1727 if (!ns) { 1728 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1729 return -EOPNOTSUPP; 1730 } 1731 1732 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1733 if (IS_ERR(ndev->rxft)) { 1734 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1735 return PTR_ERR(ndev->rxft); 1736 } 1737 mlx5_vdpa_add_rx_flow_table(ndev); 1738 1739 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1740 if (err) 1741 goto err_add; 1742 1743 return 0; 1744 1745 err_add: 1746 mlx5_vdpa_remove_rx_flow_table(ndev); 1747 mlx5_destroy_flow_table(ndev->rxft); 1748 return err; 1749 } 1750 1751 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1752 { 1753 clear_mac_vlan_table(ndev); 1754 mlx5_vdpa_remove_rx_flow_table(ndev); 1755 mlx5_destroy_flow_table(ndev->rxft); 1756 } 1757 1758 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1759 { 1760 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1761 struct mlx5_control_vq *cvq = &mvdev->cvq; 1762 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1763 struct mlx5_core_dev *pfmdev; 1764 size_t read; 1765 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1766 1767 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1768 switch (cmd) { 1769 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1770 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1771 if (read != ETH_ALEN) 1772 break; 1773 1774 if (!memcmp(ndev->config.mac, mac, 6)) { 1775 status = VIRTIO_NET_OK; 1776 break; 1777 } 1778 1779 if (is_zero_ether_addr(mac)) 1780 break; 1781 1782 if (!is_zero_ether_addr(ndev->config.mac)) { 1783 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1784 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1785 ndev->config.mac); 1786 break; 1787 } 1788 } 1789 1790 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1791 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1792 mac); 1793 break; 1794 } 1795 1796 /* backup the original mac address so that if failed to add the forward rules 1797 * we could restore it 1798 */ 1799 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1800 1801 memcpy(ndev->config.mac, mac, ETH_ALEN); 1802 1803 /* Need recreate the flow table entry, so that the packet could forward back 1804 */ 1805 mac_vlan_del(ndev, mac_back, 0, false); 1806 1807 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1808 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1809 1810 /* Although it hardly run here, we still need double check */ 1811 if (is_zero_ether_addr(mac_back)) { 1812 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1813 break; 1814 } 1815 1816 /* Try to restore original mac address to MFPS table, and try to restore 1817 * the forward rule entry. 1818 */ 1819 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1820 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1821 ndev->config.mac); 1822 } 1823 1824 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1825 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1826 mac_back); 1827 } 1828 1829 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1830 1831 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1832 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1833 1834 break; 1835 } 1836 1837 status = VIRTIO_NET_OK; 1838 break; 1839 1840 default: 1841 break; 1842 } 1843 1844 return status; 1845 } 1846 1847 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1848 { 1849 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1850 int cur_qps = ndev->cur_num_vqs / 2; 1851 int err; 1852 int i; 1853 1854 if (cur_qps > newqps) { 1855 err = modify_rqt(ndev, 2 * newqps); 1856 if (err) 1857 return err; 1858 1859 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1860 teardown_vq(ndev, &ndev->vqs[i]); 1861 1862 ndev->cur_num_vqs = 2 * newqps; 1863 } else { 1864 ndev->cur_num_vqs = 2 * newqps; 1865 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1866 err = setup_vq(ndev, &ndev->vqs[i]); 1867 if (err) 1868 goto clean_added; 1869 } 1870 err = modify_rqt(ndev, 2 * newqps); 1871 if (err) 1872 goto clean_added; 1873 } 1874 return 0; 1875 1876 clean_added: 1877 for (--i; i >= 2 * cur_qps; --i) 1878 teardown_vq(ndev, &ndev->vqs[i]); 1879 1880 ndev->cur_num_vqs = 2 * cur_qps; 1881 1882 return err; 1883 } 1884 1885 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1886 { 1887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1888 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1889 struct mlx5_control_vq *cvq = &mvdev->cvq; 1890 struct virtio_net_ctrl_mq mq; 1891 size_t read; 1892 u16 newqps; 1893 1894 switch (cmd) { 1895 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1896 /* This mq feature check aligns with pre-existing userspace 1897 * implementation. 1898 * 1899 * Without it, an untrusted driver could fake a multiqueue config 1900 * request down to a non-mq device that may cause kernel to 1901 * panic due to uninitialized resources for extra vqs. Even with 1902 * a well behaving guest driver, it is not expected to allow 1903 * changing the number of vqs on a non-mq device. 1904 */ 1905 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1906 break; 1907 1908 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1909 if (read != sizeof(mq)) 1910 break; 1911 1912 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1913 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1914 newqps > ndev->rqt_size) 1915 break; 1916 1917 if (ndev->cur_num_vqs == 2 * newqps) { 1918 status = VIRTIO_NET_OK; 1919 break; 1920 } 1921 1922 if (!change_num_qps(mvdev, newqps)) 1923 status = VIRTIO_NET_OK; 1924 1925 break; 1926 default: 1927 break; 1928 } 1929 1930 return status; 1931 } 1932 1933 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1934 { 1935 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1936 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1937 struct mlx5_control_vq *cvq = &mvdev->cvq; 1938 __virtio16 vlan; 1939 size_t read; 1940 u16 id; 1941 1942 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 1943 return status; 1944 1945 switch (cmd) { 1946 case VIRTIO_NET_CTRL_VLAN_ADD: 1947 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1948 if (read != sizeof(vlan)) 1949 break; 1950 1951 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1952 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1953 break; 1954 1955 status = VIRTIO_NET_OK; 1956 break; 1957 case VIRTIO_NET_CTRL_VLAN_DEL: 1958 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1959 if (read != sizeof(vlan)) 1960 break; 1961 1962 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1963 mac_vlan_del(ndev, ndev->config.mac, id, true); 1964 status = VIRTIO_NET_OK; 1965 break; 1966 default: 1967 break; 1968 } 1969 1970 return status; 1971 } 1972 1973 static void mlx5_cvq_kick_handler(struct work_struct *work) 1974 { 1975 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1976 struct virtio_net_ctrl_hdr ctrl; 1977 struct mlx5_vdpa_wq_ent *wqent; 1978 struct mlx5_vdpa_dev *mvdev; 1979 struct mlx5_control_vq *cvq; 1980 struct mlx5_vdpa_net *ndev; 1981 size_t read, write; 1982 int err; 1983 1984 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 1985 mvdev = wqent->mvdev; 1986 ndev = to_mlx5_vdpa_ndev(mvdev); 1987 cvq = &mvdev->cvq; 1988 1989 down_write(&ndev->reslock); 1990 1991 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1992 goto out; 1993 1994 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1995 goto out; 1996 1997 if (!cvq->ready) 1998 goto out; 1999 2000 while (true) { 2001 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2002 GFP_ATOMIC); 2003 if (err <= 0) 2004 break; 2005 2006 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2007 if (read != sizeof(ctrl)) 2008 break; 2009 2010 cvq->received_desc++; 2011 switch (ctrl.class) { 2012 case VIRTIO_NET_CTRL_MAC: 2013 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2014 break; 2015 case VIRTIO_NET_CTRL_MQ: 2016 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2017 break; 2018 case VIRTIO_NET_CTRL_VLAN: 2019 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2020 break; 2021 default: 2022 break; 2023 } 2024 2025 /* Make sure data is written before advancing index */ 2026 smp_wmb(); 2027 2028 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2029 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2030 vringh_kiov_cleanup(&cvq->riov); 2031 vringh_kiov_cleanup(&cvq->wiov); 2032 2033 if (vringh_need_notify_iotlb(&cvq->vring)) 2034 vringh_notify(&cvq->vring); 2035 2036 cvq->completed_desc++; 2037 queue_work(mvdev->wq, &wqent->work); 2038 break; 2039 } 2040 2041 out: 2042 up_write(&ndev->reslock); 2043 } 2044 2045 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2046 { 2047 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2048 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2049 struct mlx5_vdpa_virtqueue *mvq; 2050 2051 if (!is_index_valid(mvdev, idx)) 2052 return; 2053 2054 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2055 if (!mvdev->wq || !mvdev->cvq.ready) 2056 return; 2057 2058 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2059 return; 2060 } 2061 2062 mvq = &ndev->vqs[idx]; 2063 if (unlikely(!mvq->ready)) 2064 return; 2065 2066 iowrite16(idx, ndev->mvdev.res.kick_addr); 2067 } 2068 2069 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2070 u64 driver_area, u64 device_area) 2071 { 2072 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2073 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2074 struct mlx5_vdpa_virtqueue *mvq; 2075 2076 if (!is_index_valid(mvdev, idx)) 2077 return -EINVAL; 2078 2079 if (is_ctrl_vq_idx(mvdev, idx)) { 2080 mvdev->cvq.desc_addr = desc_area; 2081 mvdev->cvq.device_addr = device_area; 2082 mvdev->cvq.driver_addr = driver_area; 2083 return 0; 2084 } 2085 2086 mvq = &ndev->vqs[idx]; 2087 mvq->desc_addr = desc_area; 2088 mvq->device_addr = device_area; 2089 mvq->driver_addr = driver_area; 2090 return 0; 2091 } 2092 2093 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2094 { 2095 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2096 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2097 struct mlx5_vdpa_virtqueue *mvq; 2098 2099 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2100 return; 2101 2102 mvq = &ndev->vqs[idx]; 2103 mvq->num_ent = num; 2104 } 2105 2106 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2107 { 2108 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2109 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2110 2111 ndev->event_cbs[idx] = *cb; 2112 if (is_ctrl_vq_idx(mvdev, idx)) 2113 mvdev->cvq.event_cb = *cb; 2114 } 2115 2116 static void mlx5_cvq_notify(struct vringh *vring) 2117 { 2118 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2119 2120 if (!cvq->event_cb.callback) 2121 return; 2122 2123 cvq->event_cb.callback(cvq->event_cb.private); 2124 } 2125 2126 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2127 { 2128 struct mlx5_control_vq *cvq = &mvdev->cvq; 2129 2130 cvq->ready = ready; 2131 if (!ready) 2132 return; 2133 2134 cvq->vring.notify = mlx5_cvq_notify; 2135 } 2136 2137 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2138 { 2139 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2140 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2141 struct mlx5_vdpa_virtqueue *mvq; 2142 int err; 2143 2144 if (!mvdev->actual_features) 2145 return; 2146 2147 if (!is_index_valid(mvdev, idx)) 2148 return; 2149 2150 if (is_ctrl_vq_idx(mvdev, idx)) { 2151 set_cvq_ready(mvdev, ready); 2152 return; 2153 } 2154 2155 mvq = &ndev->vqs[idx]; 2156 if (!ready) { 2157 suspend_vq(ndev, mvq); 2158 } else { 2159 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2160 if (err) { 2161 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2162 ready = false; 2163 } 2164 } 2165 2166 2167 mvq->ready = ready; 2168 } 2169 2170 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2171 { 2172 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2173 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2174 2175 if (!is_index_valid(mvdev, idx)) 2176 return false; 2177 2178 if (is_ctrl_vq_idx(mvdev, idx)) 2179 return mvdev->cvq.ready; 2180 2181 return ndev->vqs[idx].ready; 2182 } 2183 2184 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2185 const struct vdpa_vq_state *state) 2186 { 2187 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2188 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2189 struct mlx5_vdpa_virtqueue *mvq; 2190 2191 if (!is_index_valid(mvdev, idx)) 2192 return -EINVAL; 2193 2194 if (is_ctrl_vq_idx(mvdev, idx)) { 2195 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2196 return 0; 2197 } 2198 2199 mvq = &ndev->vqs[idx]; 2200 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2201 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2202 return -EINVAL; 2203 } 2204 2205 mvq->used_idx = state->split.avail_index; 2206 mvq->avail_idx = state->split.avail_index; 2207 return 0; 2208 } 2209 2210 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2211 { 2212 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2213 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2214 struct mlx5_vdpa_virtqueue *mvq; 2215 struct mlx5_virtq_attr attr; 2216 int err; 2217 2218 if (!is_index_valid(mvdev, idx)) 2219 return -EINVAL; 2220 2221 if (is_ctrl_vq_idx(mvdev, idx)) { 2222 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2223 return 0; 2224 } 2225 2226 mvq = &ndev->vqs[idx]; 2227 /* If the virtq object was destroyed, use the value saved at 2228 * the last minute of suspend_vq. This caters for userspace 2229 * that cares about emulating the index after vq is stopped. 2230 */ 2231 if (!mvq->initialized) { 2232 /* Firmware returns a wrong value for the available index. 2233 * Since both values should be identical, we take the value of 2234 * used_idx which is reported correctly. 2235 */ 2236 state->split.avail_index = mvq->used_idx; 2237 return 0; 2238 } 2239 2240 err = query_virtqueue(ndev, mvq, &attr); 2241 if (err) { 2242 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2243 return err; 2244 } 2245 state->split.avail_index = attr.used_index; 2246 return 0; 2247 } 2248 2249 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2250 { 2251 return PAGE_SIZE; 2252 } 2253 2254 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2255 { 2256 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2257 2258 if (is_ctrl_vq_idx(mvdev, idx)) 2259 return MLX5_VDPA_CVQ_GROUP; 2260 2261 return MLX5_VDPA_DATAVQ_GROUP; 2262 } 2263 2264 static u64 mlx_to_vritio_features(u16 dev_features) 2265 { 2266 u64 result = 0; 2267 2268 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2269 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2270 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2271 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2272 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2273 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2274 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2275 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2276 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2277 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2278 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2279 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2280 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2281 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2282 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2283 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2284 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2285 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2286 2287 return result; 2288 } 2289 2290 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2291 { 2292 u64 mlx_vdpa_features = 0; 2293 u16 dev_features; 2294 2295 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2296 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2297 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2298 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2299 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2300 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2301 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2302 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2303 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2304 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2305 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2306 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2307 2308 return mlx_vdpa_features; 2309 } 2310 2311 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2312 { 2313 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2314 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2315 2316 print_features(mvdev, ndev->mvdev.mlx_features, false); 2317 return ndev->mvdev.mlx_features; 2318 } 2319 2320 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2321 { 2322 /* Minimum features to expect */ 2323 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2324 return -EOPNOTSUPP; 2325 2326 /* Double check features combination sent down by the driver. 2327 * Fail invalid features due to absence of the depended feature. 2328 * 2329 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2330 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2331 * By failing the invalid features sent down by untrusted drivers, 2332 * we're assured the assumption made upon is_index_valid() and 2333 * is_ctrl_vq_idx() will not be compromised. 2334 */ 2335 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2336 BIT_ULL(VIRTIO_NET_F_MQ)) 2337 return -EINVAL; 2338 2339 return 0; 2340 } 2341 2342 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2343 { 2344 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2345 int err; 2346 int i; 2347 2348 for (i = 0; i < mvdev->max_vqs; i++) { 2349 err = setup_vq(ndev, &ndev->vqs[i]); 2350 if (err) 2351 goto err_vq; 2352 } 2353 2354 return 0; 2355 2356 err_vq: 2357 for (--i; i >= 0; i--) 2358 teardown_vq(ndev, &ndev->vqs[i]); 2359 2360 return err; 2361 } 2362 2363 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2364 { 2365 struct mlx5_vdpa_virtqueue *mvq; 2366 int i; 2367 2368 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2369 mvq = &ndev->vqs[i]; 2370 if (!mvq->initialized) 2371 continue; 2372 2373 teardown_vq(ndev, mvq); 2374 } 2375 } 2376 2377 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2378 { 2379 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2380 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2381 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2382 mvdev->max_idx = mvdev->max_vqs; 2383 } else { 2384 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2385 * CVQ gets index 2 2386 */ 2387 mvdev->max_idx = 2; 2388 } 2389 } else { 2390 /* Two data virtqueues only: one for rx and one for tx */ 2391 mvdev->max_idx = 1; 2392 } 2393 } 2394 2395 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2396 { 2397 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2398 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2399 int err; 2400 2401 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2402 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2403 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2404 if (vport) 2405 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2406 2407 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2408 if (err) 2409 return 0; 2410 2411 return MLX5_GET(query_vport_state_out, out, state); 2412 } 2413 2414 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2415 { 2416 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2417 VPORT_STATE_UP) 2418 return true; 2419 2420 return false; 2421 } 2422 2423 static void update_carrier(struct work_struct *work) 2424 { 2425 struct mlx5_vdpa_wq_ent *wqent; 2426 struct mlx5_vdpa_dev *mvdev; 2427 struct mlx5_vdpa_net *ndev; 2428 2429 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2430 mvdev = wqent->mvdev; 2431 ndev = to_mlx5_vdpa_ndev(mvdev); 2432 if (get_link_state(mvdev)) 2433 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2434 else 2435 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2436 2437 if (ndev->config_cb.callback) 2438 ndev->config_cb.callback(ndev->config_cb.private); 2439 2440 kfree(wqent); 2441 } 2442 2443 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2444 { 2445 struct mlx5_vdpa_wq_ent *wqent; 2446 2447 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2448 if (!wqent) 2449 return -ENOMEM; 2450 2451 wqent->mvdev = &ndev->mvdev; 2452 INIT_WORK(&wqent->work, update_carrier); 2453 queue_work(ndev->mvdev.wq, &wqent->work); 2454 return 0; 2455 } 2456 2457 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2458 { 2459 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2460 struct mlx5_eqe *eqe = param; 2461 int ret = NOTIFY_DONE; 2462 2463 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2464 switch (eqe->sub_type) { 2465 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2466 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2467 if (queue_link_work(ndev)) 2468 return NOTIFY_DONE; 2469 2470 ret = NOTIFY_OK; 2471 break; 2472 default: 2473 return NOTIFY_DONE; 2474 } 2475 return ret; 2476 } 2477 return ret; 2478 } 2479 2480 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2481 { 2482 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2483 return; 2484 2485 ndev->nb.notifier_call = event_handler; 2486 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2487 ndev->nb_registered = true; 2488 queue_link_work(ndev); 2489 } 2490 2491 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2492 { 2493 if (!ndev->nb_registered) 2494 return; 2495 2496 ndev->nb_registered = false; 2497 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2498 if (ndev->mvdev.wq) 2499 flush_workqueue(ndev->mvdev.wq); 2500 } 2501 2502 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2503 { 2504 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2505 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2506 int err; 2507 2508 print_features(mvdev, features, true); 2509 2510 err = verify_driver_features(mvdev, features); 2511 if (err) 2512 return err; 2513 2514 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2515 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2516 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2517 else 2518 ndev->rqt_size = 1; 2519 2520 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 2521 * 5.1.6.5.5 "Device operation in multiqueue mode": 2522 * 2523 * Multiqueue is disabled by default. 2524 * The driver enables multiqueue by sending a command using class 2525 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 2526 * operation, as follows: ... 2527 */ 2528 ndev->cur_num_vqs = 2; 2529 2530 update_cvq_info(mvdev); 2531 return err; 2532 } 2533 2534 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2535 { 2536 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2537 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2538 2539 ndev->config_cb = *cb; 2540 } 2541 2542 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2543 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2544 { 2545 return MLX5_VDPA_MAX_VQ_ENTRIES; 2546 } 2547 2548 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2549 { 2550 return VIRTIO_ID_NET; 2551 } 2552 2553 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2554 { 2555 return PCI_VENDOR_ID_MELLANOX; 2556 } 2557 2558 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2559 { 2560 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2561 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2562 2563 print_status(mvdev, ndev->mvdev.status, false); 2564 return ndev->mvdev.status; 2565 } 2566 2567 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2568 { 2569 struct mlx5_vq_restore_info *ri = &mvq->ri; 2570 struct mlx5_virtq_attr attr = {}; 2571 int err; 2572 2573 if (mvq->initialized) { 2574 err = query_virtqueue(ndev, mvq, &attr); 2575 if (err) 2576 return err; 2577 } 2578 2579 ri->avail_index = attr.available_index; 2580 ri->used_index = attr.used_index; 2581 ri->ready = mvq->ready; 2582 ri->num_ent = mvq->num_ent; 2583 ri->desc_addr = mvq->desc_addr; 2584 ri->device_addr = mvq->device_addr; 2585 ri->driver_addr = mvq->driver_addr; 2586 ri->map = mvq->map; 2587 ri->restore = true; 2588 return 0; 2589 } 2590 2591 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2592 { 2593 int i; 2594 2595 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2596 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2597 save_channel_info(ndev, &ndev->vqs[i]); 2598 } 2599 return 0; 2600 } 2601 2602 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2603 { 2604 int i; 2605 2606 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2607 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2608 } 2609 2610 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2611 { 2612 struct mlx5_vdpa_virtqueue *mvq; 2613 struct mlx5_vq_restore_info *ri; 2614 int i; 2615 2616 mlx5_clear_vqs(ndev); 2617 init_mvqs(ndev); 2618 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2619 mvq = &ndev->vqs[i]; 2620 ri = &mvq->ri; 2621 if (!ri->restore) 2622 continue; 2623 2624 mvq->avail_idx = ri->avail_index; 2625 mvq->used_idx = ri->used_index; 2626 mvq->ready = ri->ready; 2627 mvq->num_ent = ri->num_ent; 2628 mvq->desc_addr = ri->desc_addr; 2629 mvq->device_addr = ri->device_addr; 2630 mvq->driver_addr = ri->driver_addr; 2631 mvq->map = ri->map; 2632 } 2633 } 2634 2635 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2636 struct vhost_iotlb *iotlb, unsigned int asid) 2637 { 2638 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2639 int err; 2640 2641 suspend_vqs(ndev); 2642 err = save_channels_info(ndev); 2643 if (err) 2644 goto err_mr; 2645 2646 teardown_driver(ndev); 2647 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2648 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); 2649 if (err) 2650 goto err_mr; 2651 2652 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2653 goto err_mr; 2654 2655 restore_channels_info(ndev); 2656 err = setup_driver(mvdev); 2657 if (err) 2658 goto err_setup; 2659 2660 return 0; 2661 2662 err_setup: 2663 mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2664 err_mr: 2665 return err; 2666 } 2667 2668 /* reslock must be held for this function */ 2669 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2670 { 2671 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2672 int err; 2673 2674 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2675 2676 if (ndev->setup) { 2677 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2678 err = 0; 2679 goto out; 2680 } 2681 mlx5_vdpa_add_debugfs(ndev); 2682 err = setup_virtqueues(mvdev); 2683 if (err) { 2684 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2685 goto err_setup; 2686 } 2687 2688 err = create_rqt(ndev); 2689 if (err) { 2690 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2691 goto err_rqt; 2692 } 2693 2694 err = create_tir(ndev); 2695 if (err) { 2696 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2697 goto err_tir; 2698 } 2699 2700 err = setup_steering(ndev); 2701 if (err) { 2702 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2703 goto err_fwd; 2704 } 2705 ndev->setup = true; 2706 2707 return 0; 2708 2709 err_fwd: 2710 destroy_tir(ndev); 2711 err_tir: 2712 destroy_rqt(ndev); 2713 err_rqt: 2714 teardown_virtqueues(ndev); 2715 err_setup: 2716 mlx5_vdpa_remove_debugfs(ndev->debugfs); 2717 out: 2718 return err; 2719 } 2720 2721 /* reslock must be held for this function */ 2722 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2723 { 2724 2725 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2726 2727 if (!ndev->setup) 2728 return; 2729 2730 mlx5_vdpa_remove_debugfs(ndev->debugfs); 2731 ndev->debugfs = NULL; 2732 teardown_steering(ndev); 2733 destroy_tir(ndev); 2734 destroy_rqt(ndev); 2735 teardown_virtqueues(ndev); 2736 ndev->setup = false; 2737 } 2738 2739 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2740 { 2741 int i; 2742 2743 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2744 ndev->vqs[i].ready = false; 2745 2746 ndev->mvdev.cvq.ready = false; 2747 } 2748 2749 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 2750 { 2751 struct mlx5_control_vq *cvq = &mvdev->cvq; 2752 int err = 0; 2753 2754 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) 2755 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2756 MLX5_CVQ_MAX_ENT, false, 2757 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2758 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2759 (struct vring_used *)(uintptr_t)cvq->device_addr); 2760 2761 return err; 2762 } 2763 2764 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2765 { 2766 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2767 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2768 int err; 2769 2770 print_status(mvdev, status, true); 2771 2772 down_write(&ndev->reslock); 2773 2774 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2775 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2776 err = setup_cvq_vring(mvdev); 2777 if (err) { 2778 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2779 goto err_setup; 2780 } 2781 register_link_notifier(ndev); 2782 err = setup_driver(mvdev); 2783 if (err) { 2784 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2785 goto err_driver; 2786 } 2787 } else { 2788 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2789 goto err_clear; 2790 } 2791 } 2792 2793 ndev->mvdev.status = status; 2794 up_write(&ndev->reslock); 2795 return; 2796 2797 err_driver: 2798 unregister_link_notifier(ndev); 2799 err_setup: 2800 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2801 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2802 err_clear: 2803 up_write(&ndev->reslock); 2804 } 2805 2806 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 2807 { 2808 int i; 2809 2810 /* default mapping all groups are mapped to asid 0 */ 2811 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 2812 mvdev->group2asid[i] = 0; 2813 } 2814 2815 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2816 { 2817 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2818 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2819 2820 print_status(mvdev, 0, true); 2821 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2822 2823 down_write(&ndev->reslock); 2824 unregister_link_notifier(ndev); 2825 teardown_driver(ndev); 2826 clear_vqs_ready(ndev); 2827 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2828 ndev->mvdev.status = 0; 2829 ndev->mvdev.suspended = false; 2830 ndev->cur_num_vqs = 0; 2831 ndev->mvdev.cvq.received_desc = 0; 2832 ndev->mvdev.cvq.completed_desc = 0; 2833 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2834 ndev->mvdev.actual_features = 0; 2835 init_group_to_asid_map(mvdev); 2836 ++mvdev->generation; 2837 2838 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2839 if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) 2840 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2841 } 2842 up_write(&ndev->reslock); 2843 2844 return 0; 2845 } 2846 2847 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2848 { 2849 return sizeof(struct virtio_net_config); 2850 } 2851 2852 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2853 unsigned int len) 2854 { 2855 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2856 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2857 2858 if (offset + len <= sizeof(struct virtio_net_config)) 2859 memcpy(buf, (u8 *)&ndev->config + offset, len); 2860 } 2861 2862 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2863 unsigned int len) 2864 { 2865 /* not supported */ 2866 } 2867 2868 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2869 { 2870 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2871 2872 return mvdev->generation; 2873 } 2874 2875 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 2876 unsigned int asid) 2877 { 2878 bool change_map; 2879 int err; 2880 2881 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); 2882 if (err) { 2883 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2884 return err; 2885 } 2886 2887 if (change_map) 2888 err = mlx5_vdpa_change_map(mvdev, iotlb, asid); 2889 2890 return err; 2891 } 2892 2893 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2894 struct vhost_iotlb *iotlb) 2895 { 2896 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2897 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2898 int err = -EINVAL; 2899 2900 down_write(&ndev->reslock); 2901 err = set_map_data(mvdev, iotlb, asid); 2902 up_write(&ndev->reslock); 2903 return err; 2904 } 2905 2906 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 2907 { 2908 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2909 2910 if (is_ctrl_vq_idx(mvdev, idx)) 2911 return &vdev->dev; 2912 2913 return mvdev->vdev.dma_dev; 2914 } 2915 2916 static void free_irqs(struct mlx5_vdpa_net *ndev) 2917 { 2918 struct mlx5_vdpa_irq_pool_entry *ent; 2919 int i; 2920 2921 if (!msix_mode_supported(&ndev->mvdev)) 2922 return; 2923 2924 if (!ndev->irqp.entries) 2925 return; 2926 2927 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 2928 ent = ndev->irqp.entries + i; 2929 if (ent->map.virq) 2930 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 2931 } 2932 kfree(ndev->irqp.entries); 2933 } 2934 2935 static void mlx5_vdpa_free(struct vdpa_device *vdev) 2936 { 2937 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2938 struct mlx5_core_dev *pfmdev; 2939 struct mlx5_vdpa_net *ndev; 2940 2941 ndev = to_mlx5_vdpa_ndev(mvdev); 2942 2943 free_resources(ndev); 2944 mlx5_vdpa_destroy_mr(mvdev); 2945 if (!is_zero_ether_addr(ndev->config.mac)) { 2946 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2947 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 2948 } 2949 mlx5_vdpa_free_resources(&ndev->mvdev); 2950 free_irqs(ndev); 2951 kfree(ndev->event_cbs); 2952 kfree(ndev->vqs); 2953 } 2954 2955 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 2956 { 2957 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2958 struct vdpa_notification_area ret = {}; 2959 struct mlx5_vdpa_net *ndev; 2960 phys_addr_t addr; 2961 2962 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2963 return ret; 2964 2965 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 2966 * notification to avoid the risk of mapping pages that contain BAR of more 2967 * than one SF 2968 */ 2969 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 2970 return ret; 2971 2972 ndev = to_mlx5_vdpa_ndev(mvdev); 2973 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 2974 ret.addr = addr; 2975 ret.size = PAGE_SIZE; 2976 return ret; 2977 } 2978 2979 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 2980 { 2981 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2982 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2983 struct mlx5_vdpa_virtqueue *mvq; 2984 2985 if (!is_index_valid(mvdev, idx)) 2986 return -EINVAL; 2987 2988 if (is_ctrl_vq_idx(mvdev, idx)) 2989 return -EOPNOTSUPP; 2990 2991 mvq = &ndev->vqs[idx]; 2992 if (!mvq->map.virq) 2993 return -EOPNOTSUPP; 2994 2995 return mvq->map.virq; 2996 } 2997 2998 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 2999 { 3000 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3001 3002 return mvdev->actual_features; 3003 } 3004 3005 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3006 u64 *received_desc, u64 *completed_desc) 3007 { 3008 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3009 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3010 void *cmd_hdr; 3011 void *ctx; 3012 int err; 3013 3014 if (!counters_supported(&ndev->mvdev)) 3015 return -EOPNOTSUPP; 3016 3017 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3018 return -EAGAIN; 3019 3020 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3021 3022 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3023 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3024 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3025 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3026 3027 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3028 if (err) 3029 return err; 3030 3031 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3032 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3033 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3034 return 0; 3035 } 3036 3037 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3038 struct sk_buff *msg, 3039 struct netlink_ext_ack *extack) 3040 { 3041 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3042 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3043 struct mlx5_vdpa_virtqueue *mvq; 3044 struct mlx5_control_vq *cvq; 3045 u64 received_desc; 3046 u64 completed_desc; 3047 int err = 0; 3048 3049 down_read(&ndev->reslock); 3050 if (!is_index_valid(mvdev, idx)) { 3051 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3052 err = -EINVAL; 3053 goto out_err; 3054 } 3055 3056 if (idx == ctrl_vq_idx(mvdev)) { 3057 cvq = &mvdev->cvq; 3058 received_desc = cvq->received_desc; 3059 completed_desc = cvq->completed_desc; 3060 goto out; 3061 } 3062 3063 mvq = &ndev->vqs[idx]; 3064 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3065 if (err) { 3066 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3067 goto out_err; 3068 } 3069 3070 out: 3071 err = -EMSGSIZE; 3072 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3073 goto out_err; 3074 3075 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3076 VDPA_ATTR_PAD)) 3077 goto out_err; 3078 3079 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3080 goto out_err; 3081 3082 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3083 VDPA_ATTR_PAD)) 3084 goto out_err; 3085 3086 err = 0; 3087 out_err: 3088 up_read(&ndev->reslock); 3089 return err; 3090 } 3091 3092 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3093 { 3094 struct mlx5_control_vq *cvq; 3095 3096 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3097 return; 3098 3099 cvq = &mvdev->cvq; 3100 cvq->ready = false; 3101 } 3102 3103 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3104 { 3105 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3106 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3107 struct mlx5_vdpa_virtqueue *mvq; 3108 int i; 3109 3110 mlx5_vdpa_info(mvdev, "suspending device\n"); 3111 3112 down_write(&ndev->reslock); 3113 unregister_link_notifier(ndev); 3114 for (i = 0; i < ndev->cur_num_vqs; i++) { 3115 mvq = &ndev->vqs[i]; 3116 suspend_vq(ndev, mvq); 3117 } 3118 mlx5_vdpa_cvq_suspend(mvdev); 3119 mvdev->suspended = true; 3120 up_write(&ndev->reslock); 3121 return 0; 3122 } 3123 3124 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3125 unsigned int asid) 3126 { 3127 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3128 3129 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3130 return -EINVAL; 3131 3132 mvdev->group2asid[group] = asid; 3133 return 0; 3134 } 3135 3136 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3137 .set_vq_address = mlx5_vdpa_set_vq_address, 3138 .set_vq_num = mlx5_vdpa_set_vq_num, 3139 .kick_vq = mlx5_vdpa_kick_vq, 3140 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3141 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3142 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3143 .set_vq_state = mlx5_vdpa_set_vq_state, 3144 .get_vq_state = mlx5_vdpa_get_vq_state, 3145 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3146 .get_vq_notification = mlx5_get_vq_notification, 3147 .get_vq_irq = mlx5_get_vq_irq, 3148 .get_vq_align = mlx5_vdpa_get_vq_align, 3149 .get_vq_group = mlx5_vdpa_get_vq_group, 3150 .get_device_features = mlx5_vdpa_get_device_features, 3151 .set_driver_features = mlx5_vdpa_set_driver_features, 3152 .get_driver_features = mlx5_vdpa_get_driver_features, 3153 .set_config_cb = mlx5_vdpa_set_config_cb, 3154 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3155 .get_device_id = mlx5_vdpa_get_device_id, 3156 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3157 .get_status = mlx5_vdpa_get_status, 3158 .set_status = mlx5_vdpa_set_status, 3159 .reset = mlx5_vdpa_reset, 3160 .get_config_size = mlx5_vdpa_get_config_size, 3161 .get_config = mlx5_vdpa_get_config, 3162 .set_config = mlx5_vdpa_set_config, 3163 .get_generation = mlx5_vdpa_get_generation, 3164 .set_map = mlx5_vdpa_set_map, 3165 .set_group_asid = mlx5_set_group_asid, 3166 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3167 .free = mlx5_vdpa_free, 3168 .suspend = mlx5_vdpa_suspend, 3169 }; 3170 3171 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3172 { 3173 u16 hw_mtu; 3174 int err; 3175 3176 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3177 if (err) 3178 return err; 3179 3180 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3181 return 0; 3182 } 3183 3184 static int alloc_resources(struct mlx5_vdpa_net *ndev) 3185 { 3186 struct mlx5_vdpa_net_resources *res = &ndev->res; 3187 int err; 3188 3189 if (res->valid) { 3190 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3191 return -EEXIST; 3192 } 3193 3194 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3195 if (err) 3196 return err; 3197 3198 err = create_tis(ndev); 3199 if (err) 3200 goto err_tis; 3201 3202 res->valid = true; 3203 3204 return 0; 3205 3206 err_tis: 3207 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3208 return err; 3209 } 3210 3211 static void free_resources(struct mlx5_vdpa_net *ndev) 3212 { 3213 struct mlx5_vdpa_net_resources *res = &ndev->res; 3214 3215 if (!res->valid) 3216 return; 3217 3218 destroy_tis(ndev); 3219 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3220 res->valid = false; 3221 } 3222 3223 static void init_mvqs(struct mlx5_vdpa_net *ndev) 3224 { 3225 struct mlx5_vdpa_virtqueue *mvq; 3226 int i; 3227 3228 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3229 mvq = &ndev->vqs[i]; 3230 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3231 mvq->index = i; 3232 mvq->ndev = ndev; 3233 mvq->fwqp.fw = true; 3234 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3235 } 3236 for (; i < ndev->mvdev.max_vqs; i++) { 3237 mvq = &ndev->vqs[i]; 3238 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3239 mvq->index = i; 3240 mvq->ndev = ndev; 3241 } 3242 } 3243 3244 struct mlx5_vdpa_mgmtdev { 3245 struct vdpa_mgmt_dev mgtdev; 3246 struct mlx5_adev *madev; 3247 struct mlx5_vdpa_net *ndev; 3248 }; 3249 3250 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3251 { 3252 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3253 void *in; 3254 int err; 3255 3256 in = kvzalloc(inlen, GFP_KERNEL); 3257 if (!in) 3258 return -ENOMEM; 3259 3260 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3261 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3262 mtu + MLX5V_ETH_HARD_MTU); 3263 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3264 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3265 3266 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3267 3268 kvfree(in); 3269 return err; 3270 } 3271 3272 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3273 { 3274 struct mlx5_vdpa_irq_pool_entry *ent; 3275 int i; 3276 3277 if (!msix_mode_supported(&ndev->mvdev)) 3278 return; 3279 3280 if (!ndev->mvdev.mdev->pdev) 3281 return; 3282 3283 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3284 if (!ndev->irqp.entries) 3285 return; 3286 3287 3288 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3289 ent = ndev->irqp.entries + i; 3290 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3291 dev_name(&ndev->mvdev.vdev.dev), i); 3292 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3293 if (!ent->map.virq) 3294 return; 3295 3296 ndev->irqp.num_ent++; 3297 } 3298 } 3299 3300 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3301 const struct vdpa_dev_set_config *add_config) 3302 { 3303 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3304 struct virtio_net_config *config; 3305 struct mlx5_core_dev *pfmdev; 3306 struct mlx5_vdpa_dev *mvdev; 3307 struct mlx5_vdpa_net *ndev; 3308 struct mlx5_core_dev *mdev; 3309 u64 device_features; 3310 u32 max_vqs; 3311 u16 mtu; 3312 int err; 3313 3314 if (mgtdev->ndev) 3315 return -ENOSPC; 3316 3317 mdev = mgtdev->madev->mdev; 3318 device_features = mgtdev->mgtdev.supported_features; 3319 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3320 if (add_config->device_features & ~device_features) { 3321 dev_warn(mdev->device, 3322 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3323 add_config->device_features, device_features); 3324 return -EINVAL; 3325 } 3326 device_features &= add_config->device_features; 3327 } else { 3328 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3329 } 3330 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3331 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3332 dev_warn(mdev->device, 3333 "Must provision minimum features 0x%llx for this device", 3334 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3335 return -EOPNOTSUPP; 3336 } 3337 3338 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3339 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3340 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3341 return -EOPNOTSUPP; 3342 } 3343 3344 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3345 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3346 if (max_vqs < 2) { 3347 dev_warn(mdev->device, 3348 "%d virtqueues are supported. At least 2 are required\n", 3349 max_vqs); 3350 return -EAGAIN; 3351 } 3352 3353 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3354 if (add_config->net.max_vq_pairs > max_vqs / 2) 3355 return -EINVAL; 3356 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3357 } else { 3358 max_vqs = 2; 3359 } 3360 3361 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 3362 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3363 if (IS_ERR(ndev)) 3364 return PTR_ERR(ndev); 3365 3366 ndev->mvdev.max_vqs = max_vqs; 3367 mvdev = &ndev->mvdev; 3368 mvdev->mdev = mdev; 3369 3370 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3371 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3372 if (!ndev->vqs || !ndev->event_cbs) { 3373 err = -ENOMEM; 3374 goto err_alloc; 3375 } 3376 3377 init_mvqs(ndev); 3378 allocate_irqs(ndev); 3379 init_rwsem(&ndev->reslock); 3380 config = &ndev->config; 3381 3382 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3383 err = config_func_mtu(mdev, add_config->net.mtu); 3384 if (err) 3385 goto err_alloc; 3386 } 3387 3388 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3389 err = query_mtu(mdev, &mtu); 3390 if (err) 3391 goto err_alloc; 3392 3393 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3394 } 3395 3396 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3397 if (get_link_state(mvdev)) 3398 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3399 else 3400 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3401 } 3402 3403 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3404 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3405 /* No bother setting mac address in config if not going to provision _F_MAC */ 3406 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3407 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3408 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3409 if (err) 3410 goto err_alloc; 3411 } 3412 3413 if (!is_zero_ether_addr(config->mac)) { 3414 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3415 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3416 if (err) 3417 goto err_alloc; 3418 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3419 /* 3420 * We used to clear _F_MAC feature bit if seeing 3421 * zero mac address when device features are not 3422 * specifically provisioned. Keep the behaviour 3423 * so old scripts do not break. 3424 */ 3425 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3426 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3427 /* Don't provision zero mac address for _F_MAC */ 3428 mlx5_vdpa_warn(&ndev->mvdev, 3429 "No mac address provisioned?\n"); 3430 err = -EINVAL; 3431 goto err_alloc; 3432 } 3433 3434 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) 3435 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3436 3437 ndev->mvdev.mlx_features = device_features; 3438 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3439 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3440 if (err) 3441 goto err_mpfs; 3442 3443 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3444 err = mlx5_vdpa_create_mr(mvdev, NULL, 0); 3445 if (err) 3446 goto err_res; 3447 } 3448 3449 err = alloc_resources(ndev); 3450 if (err) 3451 goto err_mr; 3452 3453 ndev->cvq_ent.mvdev = mvdev; 3454 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3455 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3456 if (!mvdev->wq) { 3457 err = -ENOMEM; 3458 goto err_res2; 3459 } 3460 3461 mvdev->vdev.mdev = &mgtdev->mgtdev; 3462 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3463 if (err) 3464 goto err_reg; 3465 3466 mgtdev->ndev = ndev; 3467 return 0; 3468 3469 err_reg: 3470 destroy_workqueue(mvdev->wq); 3471 err_res2: 3472 free_resources(ndev); 3473 err_mr: 3474 mlx5_vdpa_destroy_mr(mvdev); 3475 err_res: 3476 mlx5_vdpa_free_resources(&ndev->mvdev); 3477 err_mpfs: 3478 if (!is_zero_ether_addr(config->mac)) 3479 mlx5_mpfs_del_mac(pfmdev, config->mac); 3480 err_alloc: 3481 put_device(&mvdev->vdev.dev); 3482 return err; 3483 } 3484 3485 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3486 { 3487 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3488 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3489 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3490 struct workqueue_struct *wq; 3491 3492 mlx5_vdpa_remove_debugfs(ndev->debugfs); 3493 ndev->debugfs = NULL; 3494 unregister_link_notifier(ndev); 3495 _vdpa_unregister_device(dev); 3496 wq = mvdev->wq; 3497 mvdev->wq = NULL; 3498 destroy_workqueue(wq); 3499 mgtdev->ndev = NULL; 3500 } 3501 3502 static const struct vdpa_mgmtdev_ops mdev_ops = { 3503 .dev_add = mlx5_vdpa_dev_add, 3504 .dev_del = mlx5_vdpa_dev_del, 3505 }; 3506 3507 static struct virtio_device_id id_table[] = { 3508 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3509 { 0 }, 3510 }; 3511 3512 static int mlx5v_probe(struct auxiliary_device *adev, 3513 const struct auxiliary_device_id *id) 3514 3515 { 3516 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3517 struct mlx5_core_dev *mdev = madev->mdev; 3518 struct mlx5_vdpa_mgmtdev *mgtdev; 3519 int err; 3520 3521 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3522 if (!mgtdev) 3523 return -ENOMEM; 3524 3525 mgtdev->mgtdev.ops = &mdev_ops; 3526 mgtdev->mgtdev.device = mdev->device; 3527 mgtdev->mgtdev.id_table = id_table; 3528 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3529 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3530 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3531 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3532 mgtdev->mgtdev.max_supported_vqs = 3533 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3534 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3535 mgtdev->madev = madev; 3536 3537 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3538 if (err) 3539 goto reg_err; 3540 3541 auxiliary_set_drvdata(adev, mgtdev); 3542 3543 return 0; 3544 3545 reg_err: 3546 kfree(mgtdev); 3547 return err; 3548 } 3549 3550 static void mlx5v_remove(struct auxiliary_device *adev) 3551 { 3552 struct mlx5_vdpa_mgmtdev *mgtdev; 3553 3554 mgtdev = auxiliary_get_drvdata(adev); 3555 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3556 kfree(mgtdev); 3557 } 3558 3559 static const struct auxiliary_device_id mlx5v_id_table[] = { 3560 { .name = MLX5_ADEV_NAME ".vnet", }, 3561 {}, 3562 }; 3563 3564 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3565 3566 static struct auxiliary_driver mlx5v_driver = { 3567 .name = "vnet", 3568 .probe = mlx5v_probe, 3569 .remove = mlx5v_remove, 3570 .id_table = mlx5v_id_table, 3571 }; 3572 3573 module_auxiliary_driver(mlx5v_driver); 3574