1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDPA simulator for networking device. 4 * 5 * Copyright (c) 2020, Red Hat Inc. All rights reserved. 6 * Author: Jason Wang <jasowang@redhat.com> 7 * 8 */ 9 10 #include <linux/init.h> 11 #include <linux/module.h> 12 #include <linux/device.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/etherdevice.h> 16 #include <linux/vringh.h> 17 #include <linux/vdpa.h> 18 #include <net/netlink.h> 19 #include <uapi/linux/virtio_net.h> 20 #include <uapi/linux/vdpa.h> 21 22 #include "vdpa_sim.h" 23 24 #define DRV_VERSION "0.1" 25 #define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>" 26 #define DRV_DESC "vDPA Device Simulator for networking device" 27 #define DRV_LICENSE "GPL v2" 28 29 #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ 30 (1ULL << VIRTIO_NET_F_MAC) | \ 31 (1ULL << VIRTIO_NET_F_STATUS) | \ 32 (1ULL << VIRTIO_NET_F_MTU) | \ 33 (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ 34 (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR)) 35 36 /* 3 virtqueues, 2 address spaces, 2 virtqueue groups */ 37 #define VDPASIM_NET_VQ_NUM 3 38 #define VDPASIM_NET_AS_NUM 2 39 #define VDPASIM_NET_GROUP_NUM 2 40 41 struct vdpasim_dataq_stats { 42 struct u64_stats_sync syncp; 43 u64 pkts; 44 u64 bytes; 45 u64 drops; 46 u64 errors; 47 u64 overruns; 48 }; 49 50 struct vdpasim_cq_stats { 51 struct u64_stats_sync syncp; 52 u64 requests; 53 u64 successes; 54 u64 errors; 55 }; 56 57 struct vdpasim_net{ 58 struct vdpasim vdpasim; 59 struct vdpasim_dataq_stats tx_stats; 60 struct vdpasim_dataq_stats rx_stats; 61 struct vdpasim_cq_stats cq_stats; 62 }; 63 64 static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim) 65 { 66 return container_of(vdpasim, struct vdpasim_net, vdpasim); 67 } 68 69 static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) 70 { 71 /* Make sure data is wrote before advancing index */ 72 smp_wmb(); 73 74 vringh_complete_iotlb(&vq->vring, vq->head, len); 75 76 /* Make sure used is visible before rasing the interrupt. */ 77 smp_wmb(); 78 79 local_bh_disable(); 80 if (vringh_need_notify_iotlb(&vq->vring) > 0) 81 vringh_notify(&vq->vring); 82 local_bh_enable(); 83 } 84 85 static bool receive_filter(struct vdpasim *vdpasim, size_t len) 86 { 87 bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1); 88 size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) : 89 sizeof(struct virtio_net_hdr); 90 struct virtio_net_config *vio_config = vdpasim->config; 91 92 if (len < ETH_ALEN + hdr_len) 93 return false; 94 95 if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) || 96 is_multicast_ether_addr(vdpasim->buffer + hdr_len)) 97 return true; 98 if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) 99 return true; 100 101 return false; 102 } 103 104 static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim, 105 u8 cmd) 106 { 107 struct virtio_net_config *vio_config = vdpasim->config; 108 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; 109 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 110 size_t read; 111 112 switch (cmd) { 113 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 114 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, 115 vio_config->mac, ETH_ALEN); 116 if (read == ETH_ALEN) 117 status = VIRTIO_NET_OK; 118 break; 119 default: 120 break; 121 } 122 123 return status; 124 } 125 126 static void vdpasim_handle_cvq(struct vdpasim *vdpasim) 127 { 128 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; 129 struct vdpasim_net *net = sim_to_net(vdpasim); 130 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 131 struct virtio_net_ctrl_hdr ctrl; 132 size_t read, write; 133 u64 requests = 0, errors = 0, successes = 0; 134 int err; 135 136 if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ))) 137 return; 138 139 if (!cvq->ready) 140 return; 141 142 while (true) { 143 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov, 144 &cvq->out_iov, 145 &cvq->head, GFP_ATOMIC); 146 if (err <= 0) 147 break; 148 149 ++requests; 150 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl, 151 sizeof(ctrl)); 152 if (read != sizeof(ctrl)) { 153 ++errors; 154 break; 155 } 156 157 switch (ctrl.class) { 158 case VIRTIO_NET_CTRL_MAC: 159 status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd); 160 break; 161 default: 162 break; 163 } 164 165 if (status == VIRTIO_NET_OK) 166 ++successes; 167 else 168 ++errors; 169 170 /* Make sure data is wrote before advancing index */ 171 smp_wmb(); 172 173 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov, 174 &status, sizeof(status)); 175 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 176 vringh_kiov_cleanup(&cvq->in_iov); 177 vringh_kiov_cleanup(&cvq->out_iov); 178 179 /* Make sure used is visible before rasing the interrupt. */ 180 smp_wmb(); 181 182 local_bh_disable(); 183 if (cvq->cb) 184 cvq->cb(cvq->private); 185 local_bh_enable(); 186 } 187 188 u64_stats_update_begin(&net->cq_stats.syncp); 189 net->cq_stats.requests += requests; 190 net->cq_stats.errors += errors; 191 net->cq_stats.successes += successes; 192 u64_stats_update_end(&net->cq_stats.syncp); 193 } 194 195 static void vdpasim_net_work(struct work_struct *work) 196 { 197 struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); 198 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; 199 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; 200 struct vdpasim_net *net = sim_to_net(vdpasim); 201 ssize_t read, write; 202 u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0; 203 u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0; 204 int err; 205 206 spin_lock(&vdpasim->lock); 207 208 if (!vdpasim->running) 209 goto out; 210 211 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 212 goto out; 213 214 vdpasim_handle_cvq(vdpasim); 215 216 if (!txq->ready || !rxq->ready) 217 goto out; 218 219 while (true) { 220 err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, 221 &txq->head, GFP_ATOMIC); 222 if (err <= 0) { 223 if (err) 224 ++tx_errors; 225 break; 226 } 227 228 ++tx_pkts; 229 read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, 230 vdpasim->buffer, 231 PAGE_SIZE); 232 233 tx_bytes += read; 234 235 if (!receive_filter(vdpasim, read)) { 236 ++rx_drops; 237 vdpasim_net_complete(txq, 0); 238 continue; 239 } 240 241 err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, 242 &rxq->head, GFP_ATOMIC); 243 if (err <= 0) { 244 ++rx_overruns; 245 vdpasim_net_complete(txq, 0); 246 break; 247 } 248 249 write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, 250 vdpasim->buffer, read); 251 if (write <= 0) { 252 ++rx_errors; 253 break; 254 } 255 256 ++rx_pkts; 257 rx_bytes += write; 258 259 vdpasim_net_complete(txq, 0); 260 vdpasim_net_complete(rxq, write); 261 262 if (tx_pkts > 4) { 263 schedule_work(&vdpasim->work); 264 goto out; 265 } 266 } 267 268 out: 269 spin_unlock(&vdpasim->lock); 270 271 u64_stats_update_begin(&net->tx_stats.syncp); 272 net->tx_stats.pkts += tx_pkts; 273 net->tx_stats.bytes += tx_bytes; 274 net->tx_stats.errors += tx_errors; 275 u64_stats_update_end(&net->tx_stats.syncp); 276 277 u64_stats_update_begin(&net->rx_stats.syncp); 278 net->rx_stats.pkts += rx_pkts; 279 net->rx_stats.bytes += rx_bytes; 280 net->rx_stats.drops += rx_drops; 281 net->rx_stats.errors += rx_errors; 282 net->rx_stats.overruns += rx_overruns; 283 u64_stats_update_end(&net->rx_stats.syncp); 284 } 285 286 static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx, 287 struct sk_buff *msg, 288 struct netlink_ext_ack *extack) 289 { 290 struct vdpasim_net *net = sim_to_net(vdpasim); 291 u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops; 292 u64 tx_pkts, tx_bytes, tx_errors, tx_drops; 293 u64 cq_requests, cq_successes, cq_errors; 294 unsigned int start; 295 int err = -EMSGSIZE; 296 297 switch(idx) { 298 case 0: 299 do { 300 start = u64_stats_fetch_begin(&net->rx_stats.syncp); 301 rx_pkts = net->rx_stats.pkts; 302 rx_bytes = net->rx_stats.bytes; 303 rx_errors = net->rx_stats.errors; 304 rx_overruns = net->rx_stats.overruns; 305 rx_drops = net->rx_stats.drops; 306 } while (u64_stats_fetch_retry(&net->rx_stats.syncp, start)); 307 308 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 309 "rx packets")) 310 break; 311 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 312 rx_pkts, VDPA_ATTR_PAD)) 313 break; 314 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 315 "rx bytes")) 316 break; 317 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 318 rx_bytes, VDPA_ATTR_PAD)) 319 break; 320 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 321 "rx errors")) 322 break; 323 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 324 rx_errors, VDPA_ATTR_PAD)) 325 break; 326 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 327 "rx overruns")) 328 break; 329 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 330 rx_overruns, VDPA_ATTR_PAD)) 331 break; 332 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 333 "rx drops")) 334 break; 335 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 336 rx_drops, VDPA_ATTR_PAD)) 337 break; 338 err = 0; 339 break; 340 case 1: 341 do { 342 start = u64_stats_fetch_begin(&net->tx_stats.syncp); 343 tx_pkts = net->tx_stats.pkts; 344 tx_bytes = net->tx_stats.bytes; 345 tx_errors = net->tx_stats.errors; 346 tx_drops = net->tx_stats.drops; 347 } while (u64_stats_fetch_retry(&net->tx_stats.syncp, start)); 348 349 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 350 "tx packets")) 351 break; 352 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 353 tx_pkts, VDPA_ATTR_PAD)) 354 break; 355 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 356 "tx bytes")) 357 break; 358 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 359 tx_bytes, VDPA_ATTR_PAD)) 360 break; 361 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 362 "tx errors")) 363 break; 364 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 365 tx_errors, VDPA_ATTR_PAD)) 366 break; 367 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 368 "tx drops")) 369 break; 370 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 371 tx_drops, VDPA_ATTR_PAD)) 372 break; 373 err = 0; 374 break; 375 case 2: 376 do { 377 start = u64_stats_fetch_begin(&net->cq_stats.syncp); 378 cq_requests = net->cq_stats.requests; 379 cq_successes = net->cq_stats.successes; 380 cq_errors = net->cq_stats.errors; 381 } while (u64_stats_fetch_retry(&net->cq_stats.syncp, start)); 382 383 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 384 "cvq requests")) 385 break; 386 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 387 cq_requests, VDPA_ATTR_PAD)) 388 break; 389 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 390 "cvq successes")) 391 break; 392 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 393 cq_successes, VDPA_ATTR_PAD)) 394 break; 395 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, 396 "cvq errors")) 397 break; 398 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, 399 cq_errors, VDPA_ATTR_PAD)) 400 break; 401 err = 0; 402 break; 403 default: 404 err = -EINVAL; 405 break; 406 } 407 408 return err; 409 } 410 411 static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) 412 { 413 struct virtio_net_config *net_config = config; 414 415 net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); 416 } 417 418 static void vdpasim_net_setup_config(struct vdpasim *vdpasim, 419 const struct vdpa_dev_set_config *config) 420 { 421 struct virtio_net_config *vio_config = vdpasim->config; 422 423 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) 424 memcpy(vio_config->mac, config->net.mac, ETH_ALEN); 425 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) 426 vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); 427 else 428 /* Setup default MTU to be 1500 */ 429 vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); 430 } 431 432 static void vdpasim_net_mgmtdev_release(struct device *dev) 433 { 434 } 435 436 static struct device vdpasim_net_mgmtdev = { 437 .init_name = "vdpasim_net", 438 .release = vdpasim_net_mgmtdev_release, 439 }; 440 441 static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 442 const struct vdpa_dev_set_config *config) 443 { 444 struct vdpasim_dev_attr dev_attr = {}; 445 struct vdpasim_net *net; 446 struct vdpasim *simdev; 447 int ret; 448 449 dev_attr.mgmt_dev = mdev; 450 dev_attr.name = name; 451 dev_attr.id = VIRTIO_ID_NET; 452 dev_attr.supported_features = VDPASIM_NET_FEATURES; 453 dev_attr.nvqs = VDPASIM_NET_VQ_NUM; 454 dev_attr.ngroups = VDPASIM_NET_GROUP_NUM; 455 dev_attr.nas = VDPASIM_NET_AS_NUM; 456 dev_attr.alloc_size = sizeof(struct vdpasim_net); 457 dev_attr.config_size = sizeof(struct virtio_net_config); 458 dev_attr.get_config = vdpasim_net_get_config; 459 dev_attr.work_fn = vdpasim_net_work; 460 dev_attr.get_stats = vdpasim_net_get_stats; 461 dev_attr.buffer_size = PAGE_SIZE; 462 463 simdev = vdpasim_create(&dev_attr, config); 464 if (IS_ERR(simdev)) 465 return PTR_ERR(simdev); 466 467 vdpasim_net_setup_config(simdev, config); 468 469 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); 470 if (ret) 471 goto reg_err; 472 473 net = sim_to_net(simdev); 474 475 u64_stats_init(&net->tx_stats.syncp); 476 u64_stats_init(&net->rx_stats.syncp); 477 u64_stats_init(&net->cq_stats.syncp); 478 479 return 0; 480 481 reg_err: 482 put_device(&simdev->vdpa.dev); 483 return ret; 484 } 485 486 static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev, 487 struct vdpa_device *dev) 488 { 489 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); 490 491 _vdpa_unregister_device(&simdev->vdpa); 492 } 493 494 static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = { 495 .dev_add = vdpasim_net_dev_add, 496 .dev_del = vdpasim_net_dev_del 497 }; 498 499 static struct virtio_device_id id_table[] = { 500 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 501 { 0 }, 502 }; 503 504 static struct vdpa_mgmt_dev mgmt_dev = { 505 .device = &vdpasim_net_mgmtdev, 506 .id_table = id_table, 507 .ops = &vdpasim_net_mgmtdev_ops, 508 .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | 509 1 << VDPA_ATTR_DEV_NET_CFG_MTU | 510 1 << VDPA_ATTR_DEV_FEATURES), 511 .max_supported_vqs = VDPASIM_NET_VQ_NUM, 512 .supported_features = VDPASIM_NET_FEATURES, 513 }; 514 515 static int __init vdpasim_net_init(void) 516 { 517 int ret; 518 519 ret = device_register(&vdpasim_net_mgmtdev); 520 if (ret) { 521 put_device(&vdpasim_net_mgmtdev); 522 return ret; 523 } 524 525 ret = vdpa_mgmtdev_register(&mgmt_dev); 526 if (ret) 527 goto parent_err; 528 return 0; 529 530 parent_err: 531 device_unregister(&vdpasim_net_mgmtdev); 532 return ret; 533 } 534 535 static void __exit vdpasim_net_exit(void) 536 { 537 vdpa_mgmtdev_unregister(&mgmt_dev); 538 device_unregister(&vdpasim_net_mgmtdev); 539 } 540 541 module_init(vdpasim_net_init); 542 module_exit(vdpasim_net_exit); 543 544 MODULE_VERSION(DRV_VERSION); 545 MODULE_LICENSE(DRV_LICENSE); 546 MODULE_AUTHOR(DRV_AUTHOR); 547 MODULE_DESCRIPTION(DRV_DESC); 548