1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDPA simulator for networking device.
4 *
5 * Copyright (c) 2020, Red Hat Inc. All rights reserved.
6 * Author: Jason Wang <jasowang@redhat.com>
7 *
8 */
9
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/etherdevice.h>
15 #include <linux/vringh.h>
16 #include <linux/vdpa.h>
17 #include <net/netlink.h>
18 #include <uapi/linux/virtio_net.h>
19 #include <uapi/linux/vdpa.h>
20
21 #include "vdpa_sim.h"
22
23 #define DRV_VERSION "0.1"
24 #define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
25 #define DRV_DESC "vDPA Device Simulator for networking device"
26 #define DRV_LICENSE "GPL v2"
27
28 #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
29 (1ULL << VIRTIO_NET_F_MAC) | \
30 (1ULL << VIRTIO_NET_F_STATUS) | \
31 (1ULL << VIRTIO_NET_F_MTU) | \
32 (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
33 (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR))
34
35 /* 3 virtqueues, 2 address spaces, 2 virtqueue groups */
36 #define VDPASIM_NET_VQ_NUM 3
37 #define VDPASIM_NET_AS_NUM 2
38 #define VDPASIM_NET_GROUP_NUM 2
39
40 struct vdpasim_dataq_stats {
41 struct u64_stats_sync syncp;
42 u64 pkts;
43 u64 bytes;
44 u64 drops;
45 u64 errors;
46 u64 overruns;
47 };
48
49 struct vdpasim_cq_stats {
50 struct u64_stats_sync syncp;
51 u64 requests;
52 u64 successes;
53 u64 errors;
54 };
55
56 struct vdpasim_net{
57 struct vdpasim vdpasim;
58 struct vdpasim_dataq_stats tx_stats;
59 struct vdpasim_dataq_stats rx_stats;
60 struct vdpasim_cq_stats cq_stats;
61 void *buffer;
62 };
63
sim_to_net(struct vdpasim * vdpasim)64 static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
65 {
66 return container_of(vdpasim, struct vdpasim_net, vdpasim);
67 }
68
vdpasim_net_complete(struct vdpasim_virtqueue * vq,size_t len)69 static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
70 {
71 /* Make sure data is wrote before advancing index */
72 smp_wmb();
73
74 vringh_complete_iotlb(&vq->vring, vq->head, len);
75
76 /* Make sure used is visible before rasing the interrupt. */
77 smp_wmb();
78
79 local_bh_disable();
80 if (vringh_need_notify_iotlb(&vq->vring) > 0)
81 vringh_notify(&vq->vring);
82 local_bh_enable();
83 }
84
receive_filter(struct vdpasim * vdpasim,size_t len)85 static bool receive_filter(struct vdpasim *vdpasim, size_t len)
86 {
87 bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1);
88 size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
89 sizeof(struct virtio_net_hdr);
90 struct virtio_net_config *vio_config = vdpasim->config;
91 struct vdpasim_net *net = sim_to_net(vdpasim);
92
93 if (len < ETH_ALEN + hdr_len)
94 return false;
95
96 if (is_broadcast_ether_addr(net->buffer + hdr_len) ||
97 is_multicast_ether_addr(net->buffer + hdr_len))
98 return true;
99 if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN))
100 return true;
101
102 return false;
103 }
104
vdpasim_handle_ctrl_mac(struct vdpasim * vdpasim,u8 cmd)105 static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
106 u8 cmd)
107 {
108 struct virtio_net_config *vio_config = vdpasim->config;
109 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
110 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
111 size_t read;
112
113 switch (cmd) {
114 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
115 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov,
116 vio_config->mac, ETH_ALEN);
117 if (read == ETH_ALEN)
118 status = VIRTIO_NET_OK;
119 break;
120 default:
121 break;
122 }
123
124 return status;
125 }
126
vdpasim_handle_cvq(struct vdpasim * vdpasim)127 static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
128 {
129 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
130 struct vdpasim_net *net = sim_to_net(vdpasim);
131 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
132 struct virtio_net_ctrl_hdr ctrl;
133 size_t read, write;
134 u64 requests = 0, errors = 0, successes = 0;
135 int err;
136
137 if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
138 return;
139
140 if (!cvq->ready)
141 return;
142
143 while (true) {
144 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov,
145 &cvq->out_iov,
146 &cvq->head, GFP_ATOMIC);
147 if (err <= 0)
148 break;
149
150 ++requests;
151 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl,
152 sizeof(ctrl));
153 if (read != sizeof(ctrl)) {
154 ++errors;
155 break;
156 }
157
158 switch (ctrl.class) {
159 case VIRTIO_NET_CTRL_MAC:
160 status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd);
161 break;
162 default:
163 break;
164 }
165
166 if (status == VIRTIO_NET_OK)
167 ++successes;
168 else
169 ++errors;
170
171 /* Make sure data is wrote before advancing index */
172 smp_wmb();
173
174 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov,
175 &status, sizeof(status));
176 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
177 vringh_kiov_cleanup(&cvq->in_iov);
178 vringh_kiov_cleanup(&cvq->out_iov);
179
180 /* Make sure used is visible before rasing the interrupt. */
181 smp_wmb();
182
183 local_bh_disable();
184 if (cvq->cb)
185 cvq->cb(cvq->private);
186 local_bh_enable();
187 }
188
189 u64_stats_update_begin(&net->cq_stats.syncp);
190 net->cq_stats.requests += requests;
191 net->cq_stats.errors += errors;
192 net->cq_stats.successes += successes;
193 u64_stats_update_end(&net->cq_stats.syncp);
194 }
195
vdpasim_net_work(struct vdpasim * vdpasim)196 static void vdpasim_net_work(struct vdpasim *vdpasim)
197 {
198 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
199 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
200 struct vdpasim_net *net = sim_to_net(vdpasim);
201 ssize_t read, write;
202 u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0;
203 u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
204 int err;
205
206 mutex_lock(&vdpasim->mutex);
207
208 if (!vdpasim->running)
209 goto out;
210
211 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
212 goto out;
213
214 vdpasim_handle_cvq(vdpasim);
215
216 if (!txq->ready || !rxq->ready)
217 goto out;
218
219 while (true) {
220 err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
221 &txq->head, GFP_ATOMIC);
222 if (err <= 0) {
223 if (err)
224 ++tx_errors;
225 break;
226 }
227
228 ++tx_pkts;
229 read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
230 net->buffer, PAGE_SIZE);
231
232 tx_bytes += read;
233
234 if (!receive_filter(vdpasim, read)) {
235 ++rx_drops;
236 vdpasim_net_complete(txq, 0);
237 continue;
238 }
239
240 err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
241 &rxq->head, GFP_ATOMIC);
242 if (err <= 0) {
243 ++rx_overruns;
244 vdpasim_net_complete(txq, 0);
245 break;
246 }
247
248 write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
249 net->buffer, read);
250 if (write <= 0) {
251 ++rx_errors;
252 break;
253 }
254
255 ++rx_pkts;
256 rx_bytes += write;
257
258 vdpasim_net_complete(txq, 0);
259 vdpasim_net_complete(rxq, write);
260
261 if (tx_pkts > 4) {
262 vdpasim_schedule_work(vdpasim);
263 goto out;
264 }
265 }
266
267 out:
268 mutex_unlock(&vdpasim->mutex);
269
270 u64_stats_update_begin(&net->tx_stats.syncp);
271 net->tx_stats.pkts += tx_pkts;
272 net->tx_stats.bytes += tx_bytes;
273 net->tx_stats.errors += tx_errors;
274 u64_stats_update_end(&net->tx_stats.syncp);
275
276 u64_stats_update_begin(&net->rx_stats.syncp);
277 net->rx_stats.pkts += rx_pkts;
278 net->rx_stats.bytes += rx_bytes;
279 net->rx_stats.drops += rx_drops;
280 net->rx_stats.errors += rx_errors;
281 net->rx_stats.overruns += rx_overruns;
282 u64_stats_update_end(&net->rx_stats.syncp);
283 }
284
vdpasim_net_get_stats(struct vdpasim * vdpasim,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)285 static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx,
286 struct sk_buff *msg,
287 struct netlink_ext_ack *extack)
288 {
289 struct vdpasim_net *net = sim_to_net(vdpasim);
290 u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops;
291 u64 tx_pkts, tx_bytes, tx_errors, tx_drops;
292 u64 cq_requests, cq_successes, cq_errors;
293 unsigned int start;
294 int err = -EMSGSIZE;
295
296 switch(idx) {
297 case 0:
298 do {
299 start = u64_stats_fetch_begin(&net->rx_stats.syncp);
300 rx_pkts = net->rx_stats.pkts;
301 rx_bytes = net->rx_stats.bytes;
302 rx_errors = net->rx_stats.errors;
303 rx_overruns = net->rx_stats.overruns;
304 rx_drops = net->rx_stats.drops;
305 } while (u64_stats_fetch_retry(&net->rx_stats.syncp, start));
306
307 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
308 "rx packets"))
309 break;
310 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
311 rx_pkts, VDPA_ATTR_PAD))
312 break;
313 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
314 "rx bytes"))
315 break;
316 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
317 rx_bytes, VDPA_ATTR_PAD))
318 break;
319 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
320 "rx errors"))
321 break;
322 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
323 rx_errors, VDPA_ATTR_PAD))
324 break;
325 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
326 "rx overruns"))
327 break;
328 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
329 rx_overruns, VDPA_ATTR_PAD))
330 break;
331 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
332 "rx drops"))
333 break;
334 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
335 rx_drops, VDPA_ATTR_PAD))
336 break;
337 err = 0;
338 break;
339 case 1:
340 do {
341 start = u64_stats_fetch_begin(&net->tx_stats.syncp);
342 tx_pkts = net->tx_stats.pkts;
343 tx_bytes = net->tx_stats.bytes;
344 tx_errors = net->tx_stats.errors;
345 tx_drops = net->tx_stats.drops;
346 } while (u64_stats_fetch_retry(&net->tx_stats.syncp, start));
347
348 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
349 "tx packets"))
350 break;
351 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
352 tx_pkts, VDPA_ATTR_PAD))
353 break;
354 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
355 "tx bytes"))
356 break;
357 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
358 tx_bytes, VDPA_ATTR_PAD))
359 break;
360 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
361 "tx errors"))
362 break;
363 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
364 tx_errors, VDPA_ATTR_PAD))
365 break;
366 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
367 "tx drops"))
368 break;
369 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
370 tx_drops, VDPA_ATTR_PAD))
371 break;
372 err = 0;
373 break;
374 case 2:
375 do {
376 start = u64_stats_fetch_begin(&net->cq_stats.syncp);
377 cq_requests = net->cq_stats.requests;
378 cq_successes = net->cq_stats.successes;
379 cq_errors = net->cq_stats.errors;
380 } while (u64_stats_fetch_retry(&net->cq_stats.syncp, start));
381
382 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
383 "cvq requests"))
384 break;
385 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
386 cq_requests, VDPA_ATTR_PAD))
387 break;
388 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
389 "cvq successes"))
390 break;
391 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
392 cq_successes, VDPA_ATTR_PAD))
393 break;
394 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
395 "cvq errors"))
396 break;
397 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
398 cq_errors, VDPA_ATTR_PAD))
399 break;
400 err = 0;
401 break;
402 default:
403 err = -EINVAL;
404 break;
405 }
406
407 return err;
408 }
409
vdpasim_net_get_config(struct vdpasim * vdpasim,void * config)410 static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
411 {
412 struct virtio_net_config *net_config = config;
413
414 net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
415 }
416
vdpasim_net_setup_config(struct vdpasim * vdpasim,const struct vdpa_dev_set_config * config)417 static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
418 const struct vdpa_dev_set_config *config)
419 {
420 struct virtio_net_config *vio_config = vdpasim->config;
421
422 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
423 memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
424 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
425 vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu);
426 else
427 /* Setup default MTU to be 1500 */
428 vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
429 }
430
vdpasim_net_free(struct vdpasim * vdpasim)431 static void vdpasim_net_free(struct vdpasim *vdpasim)
432 {
433 struct vdpasim_net *net = sim_to_net(vdpasim);
434
435 kvfree(net->buffer);
436 }
437
vdpasim_net_mgmtdev_release(struct device * dev)438 static void vdpasim_net_mgmtdev_release(struct device *dev)
439 {
440 }
441
442 static struct device vdpasim_net_mgmtdev = {
443 .init_name = "vdpasim_net",
444 .release = vdpasim_net_mgmtdev_release,
445 };
446
vdpasim_net_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)447 static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
448 const struct vdpa_dev_set_config *config)
449 {
450 struct vdpasim_dev_attr dev_attr = {};
451 struct vdpasim_net *net;
452 struct vdpasim *simdev;
453 int ret;
454
455 dev_attr.mgmt_dev = mdev;
456 dev_attr.name = name;
457 dev_attr.id = VIRTIO_ID_NET;
458 dev_attr.supported_features = VDPASIM_NET_FEATURES;
459 dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
460 dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
461 dev_attr.nas = VDPASIM_NET_AS_NUM;
462 dev_attr.alloc_size = sizeof(struct vdpasim_net);
463 dev_attr.config_size = sizeof(struct virtio_net_config);
464 dev_attr.get_config = vdpasim_net_get_config;
465 dev_attr.work_fn = vdpasim_net_work;
466 dev_attr.get_stats = vdpasim_net_get_stats;
467 dev_attr.free = vdpasim_net_free;
468
469 simdev = vdpasim_create(&dev_attr, config);
470 if (IS_ERR(simdev))
471 return PTR_ERR(simdev);
472
473 vdpasim_net_setup_config(simdev, config);
474
475 net = sim_to_net(simdev);
476
477 u64_stats_init(&net->tx_stats.syncp);
478 u64_stats_init(&net->rx_stats.syncp);
479 u64_stats_init(&net->cq_stats.syncp);
480
481 net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL);
482 if (!net->buffer) {
483 ret = -ENOMEM;
484 goto reg_err;
485 }
486
487 /*
488 * Initialization must be completed before this call, since it can
489 * connect the device to the vDPA bus, so requests can arrive after
490 * this call.
491 */
492 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
493 if (ret)
494 goto reg_err;
495
496 return 0;
497
498 reg_err:
499 put_device(&simdev->vdpa.dev);
500 return ret;
501 }
502
vdpasim_net_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)503 static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
504 struct vdpa_device *dev)
505 {
506 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
507
508 _vdpa_unregister_device(&simdev->vdpa);
509 }
510
511 static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
512 .dev_add = vdpasim_net_dev_add,
513 .dev_del = vdpasim_net_dev_del
514 };
515
516 static struct virtio_device_id id_table[] = {
517 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
518 { 0 },
519 };
520
521 static struct vdpa_mgmt_dev mgmt_dev = {
522 .device = &vdpasim_net_mgmtdev,
523 .id_table = id_table,
524 .ops = &vdpasim_net_mgmtdev_ops,
525 .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
526 1 << VDPA_ATTR_DEV_NET_CFG_MTU |
527 1 << VDPA_ATTR_DEV_FEATURES),
528 .max_supported_vqs = VDPASIM_NET_VQ_NUM,
529 .supported_features = VDPASIM_NET_FEATURES,
530 };
531
vdpasim_net_init(void)532 static int __init vdpasim_net_init(void)
533 {
534 int ret;
535
536 ret = device_register(&vdpasim_net_mgmtdev);
537 if (ret) {
538 put_device(&vdpasim_net_mgmtdev);
539 return ret;
540 }
541
542 ret = vdpa_mgmtdev_register(&mgmt_dev);
543 if (ret)
544 goto parent_err;
545 return 0;
546
547 parent_err:
548 device_unregister(&vdpasim_net_mgmtdev);
549 return ret;
550 }
551
vdpasim_net_exit(void)552 static void __exit vdpasim_net_exit(void)
553 {
554 vdpa_mgmtdev_unregister(&mgmt_dev);
555 device_unregister(&vdpasim_net_mgmtdev);
556 }
557
558 module_init(vdpasim_net_init);
559 module_exit(vdpasim_net_exit);
560
561 MODULE_VERSION(DRV_VERSION);
562 MODULE_LICENSE(DRV_LICENSE);
563 MODULE_AUTHOR(DRV_AUTHOR);
564 MODULE_DESCRIPTION(DRV_DESC);
565