1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDPA simulator for networking device.
4  *
5  * Copyright (c) 2020, Red Hat Inc. All rights reserved.
6  *     Author: Jason Wang <jasowang@redhat.com>
7  *
8  */
9 
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/etherdevice.h>
16 #include <linux/vringh.h>
17 #include <linux/vdpa.h>
18 #include <net/netlink.h>
19 #include <uapi/linux/virtio_net.h>
20 #include <uapi/linux/vdpa.h>
21 
22 #include "vdpa_sim.h"
23 
24 #define DRV_VERSION  "0.1"
25 #define DRV_AUTHOR   "Jason Wang <jasowang@redhat.com>"
26 #define DRV_DESC     "vDPA Device Simulator for networking device"
27 #define DRV_LICENSE  "GPL v2"
28 
29 #define VDPASIM_NET_FEATURES	(VDPASIM_FEATURES | \
30 				 (1ULL << VIRTIO_NET_F_MAC) | \
31 				 (1ULL << VIRTIO_NET_F_STATUS) | \
32 				 (1ULL << VIRTIO_NET_F_MTU) | \
33 				 (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
34 				 (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR))
35 
36 /* 3 virtqueues, 2 address spaces, 2 virtqueue groups */
37 #define VDPASIM_NET_VQ_NUM	3
38 #define VDPASIM_NET_AS_NUM	2
39 #define VDPASIM_NET_GROUP_NUM	2
40 
41 struct vdpasim_dataq_stats {
42 	struct u64_stats_sync syncp;
43 	u64 pkts;
44 	u64 bytes;
45 	u64 drops;
46 	u64 errors;
47 	u64 overruns;
48 };
49 
50 struct vdpasim_cq_stats {
51 	struct u64_stats_sync syncp;
52 	u64 requests;
53 	u64 successes;
54 	u64 errors;
55 };
56 
57 struct vdpasim_net{
58 	struct vdpasim vdpasim;
59 	struct vdpasim_dataq_stats tx_stats;
60 	struct vdpasim_dataq_stats rx_stats;
61 	struct vdpasim_cq_stats cq_stats;
62 };
63 
64 static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
65 {
66 	return container_of(vdpasim, struct vdpasim_net, vdpasim);
67 }
68 
69 static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
70 {
71 	/* Make sure data is wrote before advancing index */
72 	smp_wmb();
73 
74 	vringh_complete_iotlb(&vq->vring, vq->head, len);
75 
76 	/* Make sure used is visible before rasing the interrupt. */
77 	smp_wmb();
78 
79 	local_bh_disable();
80 	if (vringh_need_notify_iotlb(&vq->vring) > 0)
81 		vringh_notify(&vq->vring);
82 	local_bh_enable();
83 }
84 
85 static bool receive_filter(struct vdpasim *vdpasim, size_t len)
86 {
87 	bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1);
88 	size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
89 				  sizeof(struct virtio_net_hdr);
90 	struct virtio_net_config *vio_config = vdpasim->config;
91 
92 	if (len < ETH_ALEN + hdr_len)
93 		return false;
94 
95 	if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) ||
96 	    is_multicast_ether_addr(vdpasim->buffer + hdr_len))
97 		return true;
98 	if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN))
99 		return true;
100 
101 	return false;
102 }
103 
104 static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
105 						   u8 cmd)
106 {
107 	struct virtio_net_config *vio_config = vdpasim->config;
108 	struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
109 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
110 	size_t read;
111 
112 	switch (cmd) {
113 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
114 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov,
115 					     vio_config->mac, ETH_ALEN);
116 		if (read == ETH_ALEN)
117 			status = VIRTIO_NET_OK;
118 		break;
119 	default:
120 		break;
121 	}
122 
123 	return status;
124 }
125 
126 static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
127 {
128 	struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
129 	struct vdpasim_net *net = sim_to_net(vdpasim);
130 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
131 	struct virtio_net_ctrl_hdr ctrl;
132 	size_t read, write;
133 	u64 requests = 0, errors = 0, successes = 0;
134 	int err;
135 
136 	if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
137 		return;
138 
139 	if (!cvq->ready)
140 		return;
141 
142 	while (true) {
143 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov,
144 					   &cvq->out_iov,
145 					   &cvq->head, GFP_ATOMIC);
146 		if (err <= 0)
147 			break;
148 
149 		++requests;
150 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl,
151 					     sizeof(ctrl));
152 		if (read != sizeof(ctrl)) {
153 			++errors;
154 			break;
155 		}
156 
157 		switch (ctrl.class) {
158 		case VIRTIO_NET_CTRL_MAC:
159 			status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd);
160 			break;
161 		default:
162 			break;
163 		}
164 
165 		if (status == VIRTIO_NET_OK)
166 			++successes;
167 		else
168 			++errors;
169 
170 		/* Make sure data is wrote before advancing index */
171 		smp_wmb();
172 
173 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov,
174 					      &status, sizeof(status));
175 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
176 		vringh_kiov_cleanup(&cvq->in_iov);
177 		vringh_kiov_cleanup(&cvq->out_iov);
178 
179 		/* Make sure used is visible before rasing the interrupt. */
180 		smp_wmb();
181 
182 		local_bh_disable();
183 		if (cvq->cb)
184 			cvq->cb(cvq->private);
185 		local_bh_enable();
186 	}
187 
188 	u64_stats_update_begin(&net->cq_stats.syncp);
189 	net->cq_stats.requests += requests;
190 	net->cq_stats.errors += errors;
191 	net->cq_stats.successes += successes;
192 	u64_stats_update_end(&net->cq_stats.syncp);
193 }
194 
195 static void vdpasim_net_work(struct work_struct *work)
196 {
197 	struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
198 	struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
199 	struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
200 	struct vdpasim_net *net = sim_to_net(vdpasim);
201 	ssize_t read, write;
202 	u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0;
203 	u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
204 	int err;
205 
206 	spin_lock(&vdpasim->lock);
207 
208 	if (!vdpasim->running)
209 		goto out;
210 
211 	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
212 		goto out;
213 
214 	vdpasim_handle_cvq(vdpasim);
215 
216 	if (!txq->ready || !rxq->ready)
217 		goto out;
218 
219 	while (true) {
220 		err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
221 					   &txq->head, GFP_ATOMIC);
222 		if (err <= 0) {
223 			if (err)
224 				++tx_errors;
225 			break;
226 		}
227 
228 		++tx_pkts;
229 		read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
230 					     vdpasim->buffer,
231 					     PAGE_SIZE);
232 
233 		tx_bytes += read;
234 
235 		if (!receive_filter(vdpasim, read)) {
236 			++rx_drops;
237 			vdpasim_net_complete(txq, 0);
238 			continue;
239 		}
240 
241 		err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
242 					   &rxq->head, GFP_ATOMIC);
243 		if (err <= 0) {
244 			++rx_overruns;
245 			vdpasim_net_complete(txq, 0);
246 			break;
247 		}
248 
249 		write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
250 					      vdpasim->buffer, read);
251 		if (write <= 0) {
252 			++rx_errors;
253 			break;
254 		}
255 
256 		++rx_pkts;
257 		rx_bytes += write;
258 
259 		vdpasim_net_complete(txq, 0);
260 		vdpasim_net_complete(rxq, write);
261 
262 		if (tx_pkts > 4) {
263 			schedule_work(&vdpasim->work);
264 			goto out;
265 		}
266 	}
267 
268 out:
269 	spin_unlock(&vdpasim->lock);
270 
271 	u64_stats_update_begin(&net->tx_stats.syncp);
272 	net->tx_stats.pkts += tx_pkts;
273 	net->tx_stats.bytes += tx_bytes;
274 	net->tx_stats.errors += tx_errors;
275 	u64_stats_update_end(&net->tx_stats.syncp);
276 
277 	u64_stats_update_begin(&net->rx_stats.syncp);
278 	net->rx_stats.pkts += rx_pkts;
279 	net->rx_stats.bytes += rx_bytes;
280 	net->rx_stats.drops += rx_drops;
281 	net->rx_stats.errors += rx_errors;
282 	net->rx_stats.overruns += rx_overruns;
283 	u64_stats_update_end(&net->rx_stats.syncp);
284 }
285 
286 static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx,
287 				 struct sk_buff *msg,
288 				 struct netlink_ext_ack *extack)
289 {
290 	struct vdpasim_net *net = sim_to_net(vdpasim);
291 	u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops;
292 	u64 tx_pkts, tx_bytes, tx_errors, tx_drops;
293 	u64 cq_requests, cq_successes, cq_errors;
294 	unsigned int start;
295 	int err = -EMSGSIZE;
296 
297 	switch(idx) {
298 	case 0:
299 		do {
300 			start = u64_stats_fetch_begin(&net->rx_stats.syncp);
301 			rx_pkts = net->rx_stats.pkts;
302 			rx_bytes = net->rx_stats.bytes;
303 			rx_errors = net->rx_stats.errors;
304 			rx_overruns = net->rx_stats.overruns;
305 			rx_drops = net->rx_stats.drops;
306 		} while (u64_stats_fetch_retry(&net->rx_stats.syncp, start));
307 
308 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
309 					"rx packets"))
310 			break;
311 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
312 				      rx_pkts, VDPA_ATTR_PAD))
313 			break;
314 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
315 				  "rx bytes"))
316 			break;
317 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
318 				      rx_bytes, VDPA_ATTR_PAD))
319 			break;
320 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
321 				  "rx errors"))
322 			break;
323 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
324 				      rx_errors, VDPA_ATTR_PAD))
325 			break;
326 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
327 				  "rx overruns"))
328 			break;
329 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
330 				      rx_overruns, VDPA_ATTR_PAD))
331 			break;
332 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
333 				  "rx drops"))
334 			break;
335 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
336 				      rx_drops, VDPA_ATTR_PAD))
337 			break;
338 		err = 0;
339 		break;
340 	case 1:
341 		do {
342 			start = u64_stats_fetch_begin(&net->tx_stats.syncp);
343 			tx_pkts = net->tx_stats.pkts;
344 			tx_bytes = net->tx_stats.bytes;
345 			tx_errors = net->tx_stats.errors;
346 			tx_drops = net->tx_stats.drops;
347 		} while (u64_stats_fetch_retry(&net->tx_stats.syncp, start));
348 
349 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
350 				  "tx packets"))
351 			break;
352 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
353 				      tx_pkts, VDPA_ATTR_PAD))
354 			break;
355 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
356 				  "tx bytes"))
357 			break;
358 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
359 				      tx_bytes, VDPA_ATTR_PAD))
360 			break;
361 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
362 				  "tx errors"))
363 			break;
364 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
365 				      tx_errors, VDPA_ATTR_PAD))
366 			break;
367 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
368 				  "tx drops"))
369 			break;
370 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
371 				      tx_drops, VDPA_ATTR_PAD))
372 			break;
373 		err = 0;
374 		break;
375 	case 2:
376 		do {
377 			start = u64_stats_fetch_begin(&net->cq_stats.syncp);
378 			cq_requests = net->cq_stats.requests;
379 			cq_successes = net->cq_stats.successes;
380 			cq_errors = net->cq_stats.errors;
381 		} while (u64_stats_fetch_retry(&net->cq_stats.syncp, start));
382 
383 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
384 				  "cvq requests"))
385 			break;
386 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
387 				      cq_requests, VDPA_ATTR_PAD))
388 			break;
389 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
390 				  "cvq successes"))
391 			break;
392 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
393 				      cq_successes, VDPA_ATTR_PAD))
394 			break;
395 		if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
396 				  "cvq errors"))
397 			break;
398 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
399 				      cq_errors, VDPA_ATTR_PAD))
400 			break;
401 		err = 0;
402 		break;
403 	default:
404 		err = -EINVAL;
405 		break;
406 	}
407 
408 	return err;
409 }
410 
411 static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
412 {
413 	struct virtio_net_config *net_config = config;
414 
415 	net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
416 }
417 
418 static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
419 				     const struct vdpa_dev_set_config *config)
420 {
421 	struct virtio_net_config *vio_config = vdpasim->config;
422 
423 	if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
424 		memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
425 	if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
426 		vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu);
427 	else
428 		/* Setup default MTU to be 1500 */
429 		vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
430 }
431 
432 static void vdpasim_net_mgmtdev_release(struct device *dev)
433 {
434 }
435 
436 static struct device vdpasim_net_mgmtdev = {
437 	.init_name = "vdpasim_net",
438 	.release = vdpasim_net_mgmtdev_release,
439 };
440 
441 static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
442 			       const struct vdpa_dev_set_config *config)
443 {
444 	struct vdpasim_dev_attr dev_attr = {};
445 	struct vdpasim_net *net;
446 	struct vdpasim *simdev;
447 	int ret;
448 
449 	dev_attr.mgmt_dev = mdev;
450 	dev_attr.name = name;
451 	dev_attr.id = VIRTIO_ID_NET;
452 	dev_attr.supported_features = VDPASIM_NET_FEATURES;
453 	dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
454 	dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
455 	dev_attr.nas = VDPASIM_NET_AS_NUM;
456 	dev_attr.alloc_size = sizeof(struct vdpasim_net);
457 	dev_attr.config_size = sizeof(struct virtio_net_config);
458 	dev_attr.get_config = vdpasim_net_get_config;
459 	dev_attr.work_fn = vdpasim_net_work;
460 	dev_attr.get_stats = vdpasim_net_get_stats;
461 	dev_attr.buffer_size = PAGE_SIZE;
462 
463 	simdev = vdpasim_create(&dev_attr, config);
464 	if (IS_ERR(simdev))
465 		return PTR_ERR(simdev);
466 
467 	vdpasim_net_setup_config(simdev, config);
468 
469 	net = sim_to_net(simdev);
470 
471 	u64_stats_init(&net->tx_stats.syncp);
472 	u64_stats_init(&net->rx_stats.syncp);
473 	u64_stats_init(&net->cq_stats.syncp);
474 
475 	/*
476 	 * Initialization must be completed before this call, since it can
477 	 * connect the device to the vDPA bus, so requests can arrive after
478 	 * this call.
479 	 */
480 	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
481 	if (ret)
482 		goto reg_err;
483 
484 	return 0;
485 
486 reg_err:
487 	put_device(&simdev->vdpa.dev);
488 	return ret;
489 }
490 
491 static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
492 				struct vdpa_device *dev)
493 {
494 	struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
495 
496 	_vdpa_unregister_device(&simdev->vdpa);
497 }
498 
499 static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
500 	.dev_add = vdpasim_net_dev_add,
501 	.dev_del = vdpasim_net_dev_del
502 };
503 
504 static struct virtio_device_id id_table[] = {
505 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
506 	{ 0 },
507 };
508 
509 static struct vdpa_mgmt_dev mgmt_dev = {
510 	.device = &vdpasim_net_mgmtdev,
511 	.id_table = id_table,
512 	.ops = &vdpasim_net_mgmtdev_ops,
513 	.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
514 			     1 << VDPA_ATTR_DEV_NET_CFG_MTU |
515 		             1 << VDPA_ATTR_DEV_FEATURES),
516 	.max_supported_vqs = VDPASIM_NET_VQ_NUM,
517 	.supported_features = VDPASIM_NET_FEATURES,
518 };
519 
520 static int __init vdpasim_net_init(void)
521 {
522 	int ret;
523 
524 	ret = device_register(&vdpasim_net_mgmtdev);
525 	if (ret) {
526 		put_device(&vdpasim_net_mgmtdev);
527 		return ret;
528 	}
529 
530 	ret = vdpa_mgmtdev_register(&mgmt_dev);
531 	if (ret)
532 		goto parent_err;
533 	return 0;
534 
535 parent_err:
536 	device_unregister(&vdpasim_net_mgmtdev);
537 	return ret;
538 }
539 
540 static void __exit vdpasim_net_exit(void)
541 {
542 	vdpa_mgmtdev_unregister(&mgmt_dev);
543 	device_unregister(&vdpasim_net_mgmtdev);
544 }
545 
546 module_init(vdpasim_net_init);
547 module_exit(vdpasim_net_exit);
548 
549 MODULE_VERSION(DRV_VERSION);
550 MODULE_LICENSE(DRV_LICENSE);
551 MODULE_AUTHOR(DRV_AUTHOR);
552 MODULE_DESCRIPTION(DRV_DESC);
553