1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci_device.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49
50 #define VIRTIO_NET_VM_VERSION 11
51
52 /* previously fixed value */
53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
55
56 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
59
60 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
61
62 #define VIRTIO_NET_TCP_FLAG 0x3F
63 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
64
65 /* IPv4 max payload, 16 bits in the header */
66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
68
69 /* header length value in ip header without option */
70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
71
72 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
74
75 /* Purge coalesced packets timer interval, This value affects the performance
76 a lot, and should be tuned carefully, '300000'(300us) is the recommended
77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78 tso/gso/gro 'off'. */
79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
80
81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
90
91 static const VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = endof(struct virtio_net_config, duplex)},
102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
103 .end = endof(struct virtio_net_config, supported_hash_types)},
104 {}
105 };
106
107 static const VirtIOConfigSizeParams cfg_size_params = {
108 .min_size = endof(struct virtio_net_config, mac),
109 .max_size = sizeof(struct virtio_net_config),
110 .feature_sizes = feature_sizes
111 };
112
virtio_net_get_subqueue(NetClientState * nc)113 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
114 {
115 VirtIONet *n = qemu_get_nic_opaque(nc);
116
117 return &n->vqs[nc->queue_index];
118 }
119
vq2q(int queue_index)120 static int vq2q(int queue_index)
121 {
122 return queue_index / 2;
123 }
124
flush_or_purge_queued_packets(NetClientState * nc)125 static void flush_or_purge_queued_packets(NetClientState *nc)
126 {
127 if (!nc->peer) {
128 return;
129 }
130
131 qemu_flush_or_purge_queued_packets(nc->peer, true);
132 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
133 }
134
135 /* TODO
136 * - we could suppress RX interrupt if we were so inclined.
137 */
138
virtio_net_get_config(VirtIODevice * vdev,uint8_t * config)139 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
140 {
141 VirtIONet *n = VIRTIO_NET(vdev);
142 struct virtio_net_config netcfg;
143 NetClientState *nc = qemu_get_queue(n->nic);
144 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
145
146 int ret = 0;
147 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
148 virtio_stw_p(vdev, &netcfg.status, n->status);
149 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
150 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
151 memcpy(netcfg.mac, n->mac, ETH_ALEN);
152 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
153 netcfg.duplex = n->net_conf.duplex;
154 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
155 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
156 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
157 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
158 virtio_stl_p(vdev, &netcfg.supported_hash_types,
159 VIRTIO_NET_RSS_SUPPORTED_HASHES);
160 memcpy(config, &netcfg, n->config_size);
161
162 /*
163 * Is this VDPA? No peer means not VDPA: there's no way to
164 * disconnect/reconnect a VDPA peer.
165 */
166 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
167 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
168 n->config_size);
169 if (ret == -1) {
170 return;
171 }
172
173 /*
174 * Some NIC/kernel combinations present 0 as the mac address. As that
175 * is not a legal address, try to proceed with the address from the
176 * QEMU command line in the hope that the address has been configured
177 * correctly elsewhere - just not reported by the device.
178 */
179 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180 info_report("Zero hardware mac address detected. Ignoring.");
181 memcpy(netcfg.mac, n->mac, ETH_ALEN);
182 }
183
184 netcfg.status |= virtio_tswap16(vdev,
185 n->status & VIRTIO_NET_S_ANNOUNCE);
186 memcpy(config, &netcfg, n->config_size);
187 }
188 }
189
virtio_net_set_config(VirtIODevice * vdev,const uint8_t * config)190 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
191 {
192 VirtIONet *n = VIRTIO_NET(vdev);
193 struct virtio_net_config netcfg = {};
194 NetClientState *nc = qemu_get_queue(n->nic);
195
196 memcpy(&netcfg, config, n->config_size);
197
198 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
199 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
200 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
201 memcpy(n->mac, netcfg.mac, ETH_ALEN);
202 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
203 }
204
205 /*
206 * Is this VDPA? No peer means not VDPA: there's no way to
207 * disconnect/reconnect a VDPA peer.
208 */
209 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
210 vhost_net_set_config(get_vhost_net(nc->peer),
211 (uint8_t *)&netcfg, 0, n->config_size,
212 VHOST_SET_CONFIG_TYPE_FRONTEND);
213 }
214 }
215
virtio_net_started(VirtIONet * n,uint8_t status)216 static bool virtio_net_started(VirtIONet *n, uint8_t status)
217 {
218 VirtIODevice *vdev = VIRTIO_DEVICE(n);
219 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
220 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
221 }
222
virtio_net_announce_notify(VirtIONet * net)223 static void virtio_net_announce_notify(VirtIONet *net)
224 {
225 VirtIODevice *vdev = VIRTIO_DEVICE(net);
226 trace_virtio_net_announce_notify();
227
228 net->status |= VIRTIO_NET_S_ANNOUNCE;
229 virtio_notify_config(vdev);
230 }
231
virtio_net_announce_timer(void * opaque)232 static void virtio_net_announce_timer(void *opaque)
233 {
234 VirtIONet *n = opaque;
235 trace_virtio_net_announce_timer(n->announce_timer.round);
236
237 n->announce_timer.round--;
238 virtio_net_announce_notify(n);
239 }
240
virtio_net_announce(NetClientState * nc)241 static void virtio_net_announce(NetClientState *nc)
242 {
243 VirtIONet *n = qemu_get_nic_opaque(nc);
244 VirtIODevice *vdev = VIRTIO_DEVICE(n);
245
246 /*
247 * Make sure the virtio migration announcement timer isn't running
248 * If it is, let it trigger announcement so that we do not cause
249 * confusion.
250 */
251 if (n->announce_timer.round) {
252 return;
253 }
254
255 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
256 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
257 virtio_net_announce_notify(n);
258 }
259 }
260
virtio_net_vhost_status(VirtIONet * n,uint8_t status)261 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
262 {
263 VirtIODevice *vdev = VIRTIO_DEVICE(n);
264 NetClientState *nc = qemu_get_queue(n->nic);
265 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
266 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
267 n->max_ncs - n->max_queue_pairs : 0;
268
269 if (!get_vhost_net(nc->peer)) {
270 return;
271 }
272
273 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
274 !!n->vhost_started) {
275 return;
276 }
277 if (!n->vhost_started) {
278 int r, i;
279
280 if (n->needs_vnet_hdr_swap) {
281 error_report("backend does not support %s vnet headers; "
282 "falling back on userspace virtio",
283 virtio_is_big_endian(vdev) ? "BE" : "LE");
284 return;
285 }
286
287 /* Any packets outstanding? Purge them to avoid touching rings
288 * when vhost is running.
289 */
290 for (i = 0; i < queue_pairs; i++) {
291 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
292
293 /* Purge both directions: TX and RX. */
294 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
295 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
296 }
297
298 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
299 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
300 if (r < 0) {
301 error_report("%uBytes MTU not supported by the backend",
302 n->net_conf.mtu);
303
304 return;
305 }
306 }
307
308 n->vhost_started = 1;
309 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
310 if (r < 0) {
311 error_report("unable to start vhost net: %d: "
312 "falling back on userspace virtio", -r);
313 n->vhost_started = 0;
314 }
315 } else {
316 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
317 n->vhost_started = 0;
318 }
319 }
320
virtio_net_set_vnet_endian_one(VirtIODevice * vdev,NetClientState * peer,bool enable)321 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
322 NetClientState *peer,
323 bool enable)
324 {
325 if (virtio_is_big_endian(vdev)) {
326 return qemu_set_vnet_be(peer, enable);
327 } else {
328 return qemu_set_vnet_le(peer, enable);
329 }
330 }
331
virtio_net_set_vnet_endian(VirtIODevice * vdev,NetClientState * ncs,int queue_pairs,bool enable)332 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
333 int queue_pairs, bool enable)
334 {
335 int i;
336
337 for (i = 0; i < queue_pairs; i++) {
338 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
339 enable) {
340 while (--i >= 0) {
341 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
342 }
343
344 return true;
345 }
346 }
347
348 return false;
349 }
350
virtio_net_vnet_endian_status(VirtIONet * n,uint8_t status)351 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
352 {
353 VirtIODevice *vdev = VIRTIO_DEVICE(n);
354 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
355
356 if (virtio_net_started(n, status)) {
357 /* Before using the device, we tell the network backend about the
358 * endianness to use when parsing vnet headers. If the backend
359 * can't do it, we fallback onto fixing the headers in the core
360 * virtio-net code.
361 */
362 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
363 queue_pairs, true);
364 } else if (virtio_net_started(n, vdev->status)) {
365 /* After using the device, we need to reset the network backend to
366 * the default (guest native endianness), otherwise the guest may
367 * lose network connectivity if it is rebooted into a different
368 * endianness.
369 */
370 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
371 }
372 }
373
virtio_net_drop_tx_queue_data(VirtIODevice * vdev,VirtQueue * vq)374 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
375 {
376 unsigned int dropped = virtqueue_drop_all(vq);
377 if (dropped) {
378 virtio_notify(vdev, vq);
379 }
380 }
381
virtio_net_set_status(struct VirtIODevice * vdev,uint8_t status)382 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
383 {
384 VirtIONet *n = VIRTIO_NET(vdev);
385 VirtIONetQueue *q;
386 int i;
387 uint8_t queue_status;
388
389 virtio_net_vnet_endian_status(n, status);
390 virtio_net_vhost_status(n, status);
391
392 for (i = 0; i < n->max_queue_pairs; i++) {
393 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
394 bool queue_started;
395 q = &n->vqs[i];
396
397 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
398 queue_status = 0;
399 } else {
400 queue_status = status;
401 }
402 queue_started =
403 virtio_net_started(n, queue_status) && !n->vhost_started;
404
405 if (queue_started) {
406 qemu_flush_queued_packets(ncs);
407 }
408
409 if (!q->tx_waiting) {
410 continue;
411 }
412
413 if (queue_started) {
414 if (q->tx_timer) {
415 timer_mod(q->tx_timer,
416 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
417 } else {
418 qemu_bh_schedule(q->tx_bh);
419 }
420 } else {
421 if (q->tx_timer) {
422 timer_del(q->tx_timer);
423 } else {
424 qemu_bh_cancel(q->tx_bh);
425 }
426 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
427 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
428 vdev->vm_running) {
429 /* if tx is waiting we are likely have some packets in tx queue
430 * and disabled notification */
431 q->tx_waiting = 0;
432 virtio_queue_set_notification(q->tx_vq, 1);
433 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
434 }
435 }
436 }
437 }
438
virtio_net_set_link_status(NetClientState * nc)439 static void virtio_net_set_link_status(NetClientState *nc)
440 {
441 VirtIONet *n = qemu_get_nic_opaque(nc);
442 VirtIODevice *vdev = VIRTIO_DEVICE(n);
443 uint16_t old_status = n->status;
444
445 if (nc->link_down)
446 n->status &= ~VIRTIO_NET_S_LINK_UP;
447 else
448 n->status |= VIRTIO_NET_S_LINK_UP;
449
450 if (n->status != old_status)
451 virtio_notify_config(vdev);
452
453 virtio_net_set_status(vdev, vdev->status);
454 }
455
rxfilter_notify(NetClientState * nc)456 static void rxfilter_notify(NetClientState *nc)
457 {
458 VirtIONet *n = qemu_get_nic_opaque(nc);
459
460 if (nc->rxfilter_notify_enabled) {
461 char *path = object_get_canonical_path(OBJECT(n->qdev));
462 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
463 g_free(path);
464
465 /* disable event notification to avoid events flooding */
466 nc->rxfilter_notify_enabled = 0;
467 }
468 }
469
get_vlan_table(VirtIONet * n)470 static intList *get_vlan_table(VirtIONet *n)
471 {
472 intList *list;
473 int i, j;
474
475 list = NULL;
476 for (i = 0; i < MAX_VLAN >> 5; i++) {
477 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
478 if (n->vlans[i] & (1U << j)) {
479 QAPI_LIST_PREPEND(list, (i << 5) + j);
480 }
481 }
482 }
483
484 return list;
485 }
486
virtio_net_query_rxfilter(NetClientState * nc)487 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
488 {
489 VirtIONet *n = qemu_get_nic_opaque(nc);
490 VirtIODevice *vdev = VIRTIO_DEVICE(n);
491 RxFilterInfo *info;
492 strList *str_list;
493 int i;
494
495 info = g_malloc0(sizeof(*info));
496 info->name = g_strdup(nc->name);
497 info->promiscuous = n->promisc;
498
499 if (n->nouni) {
500 info->unicast = RX_STATE_NONE;
501 } else if (n->alluni) {
502 info->unicast = RX_STATE_ALL;
503 } else {
504 info->unicast = RX_STATE_NORMAL;
505 }
506
507 if (n->nomulti) {
508 info->multicast = RX_STATE_NONE;
509 } else if (n->allmulti) {
510 info->multicast = RX_STATE_ALL;
511 } else {
512 info->multicast = RX_STATE_NORMAL;
513 }
514
515 info->broadcast_allowed = n->nobcast;
516 info->multicast_overflow = n->mac_table.multi_overflow;
517 info->unicast_overflow = n->mac_table.uni_overflow;
518
519 info->main_mac = qemu_mac_strdup_printf(n->mac);
520
521 str_list = NULL;
522 for (i = 0; i < n->mac_table.first_multi; i++) {
523 QAPI_LIST_PREPEND(str_list,
524 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
525 }
526 info->unicast_table = str_list;
527
528 str_list = NULL;
529 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
530 QAPI_LIST_PREPEND(str_list,
531 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
532 }
533 info->multicast_table = str_list;
534 info->vlan_table = get_vlan_table(n);
535
536 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
537 info->vlan = RX_STATE_ALL;
538 } else if (!info->vlan_table) {
539 info->vlan = RX_STATE_NONE;
540 } else {
541 info->vlan = RX_STATE_NORMAL;
542 }
543
544 /* enable event notification after query */
545 nc->rxfilter_notify_enabled = 1;
546
547 return info;
548 }
549
virtio_net_queue_reset(VirtIODevice * vdev,uint32_t queue_index)550 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
551 {
552 VirtIONet *n = VIRTIO_NET(vdev);
553 NetClientState *nc;
554
555 /* validate queue_index and skip for cvq */
556 if (queue_index >= n->max_queue_pairs * 2) {
557 return;
558 }
559
560 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
561
562 if (!nc->peer) {
563 return;
564 }
565
566 if (get_vhost_net(nc->peer) &&
567 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
568 vhost_net_virtqueue_reset(vdev, nc, queue_index);
569 }
570
571 flush_or_purge_queued_packets(nc);
572 }
573
virtio_net_queue_enable(VirtIODevice * vdev,uint32_t queue_index)574 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
575 {
576 VirtIONet *n = VIRTIO_NET(vdev);
577 NetClientState *nc;
578 int r;
579
580 /* validate queue_index and skip for cvq */
581 if (queue_index >= n->max_queue_pairs * 2) {
582 return;
583 }
584
585 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
586
587 if (!nc->peer || !vdev->vhost_started) {
588 return;
589 }
590
591 if (get_vhost_net(nc->peer) &&
592 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
593 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
594 if (r < 0) {
595 error_report("unable to restart vhost net virtqueue: %d, "
596 "when resetting the queue", queue_index);
597 }
598 }
599 }
600
virtio_net_reset(VirtIODevice * vdev)601 static void virtio_net_reset(VirtIODevice *vdev)
602 {
603 VirtIONet *n = VIRTIO_NET(vdev);
604 int i;
605
606 /* Reset back to compatibility mode */
607 n->promisc = 1;
608 n->allmulti = 0;
609 n->alluni = 0;
610 n->nomulti = 0;
611 n->nouni = 0;
612 n->nobcast = 0;
613 /* multiqueue is disabled by default */
614 n->curr_queue_pairs = 1;
615 timer_del(n->announce_timer.tm);
616 n->announce_timer.round = 0;
617 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
618
619 /* Flush any MAC and VLAN filter table state */
620 n->mac_table.in_use = 0;
621 n->mac_table.first_multi = 0;
622 n->mac_table.multi_overflow = 0;
623 n->mac_table.uni_overflow = 0;
624 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
625 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
626 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
627 memset(n->vlans, 0, MAX_VLAN >> 3);
628
629 /* Flush any async TX */
630 for (i = 0; i < n->max_queue_pairs; i++) {
631 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
632 }
633 }
634
peer_test_vnet_hdr(VirtIONet * n)635 static void peer_test_vnet_hdr(VirtIONet *n)
636 {
637 NetClientState *nc = qemu_get_queue(n->nic);
638 if (!nc->peer) {
639 return;
640 }
641
642 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
643 }
644
peer_has_vnet_hdr(VirtIONet * n)645 static int peer_has_vnet_hdr(VirtIONet *n)
646 {
647 return n->has_vnet_hdr;
648 }
649
peer_has_ufo(VirtIONet * n)650 static int peer_has_ufo(VirtIONet *n)
651 {
652 if (!peer_has_vnet_hdr(n))
653 return 0;
654
655 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
656
657 return n->has_ufo;
658 }
659
peer_has_uso(VirtIONet * n)660 static int peer_has_uso(VirtIONet *n)
661 {
662 if (!peer_has_vnet_hdr(n)) {
663 return 0;
664 }
665
666 return qemu_has_uso(qemu_get_queue(n->nic)->peer);
667 }
668
virtio_net_set_mrg_rx_bufs(VirtIONet * n,int mergeable_rx_bufs,int version_1,int hash_report)669 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
670 int version_1, int hash_report)
671 {
672 int i;
673 NetClientState *nc;
674
675 n->mergeable_rx_bufs = mergeable_rx_bufs;
676
677 /*
678 * Note: when extending the vnet header, please make sure to
679 * change the vnet header copying logic in virtio_net_flush_tx()
680 * as well.
681 */
682 if (version_1) {
683 n->guest_hdr_len = hash_report ?
684 sizeof(struct virtio_net_hdr_v1_hash) :
685 sizeof(struct virtio_net_hdr_mrg_rxbuf);
686 n->rss_data.populate_hash = !!hash_report;
687 } else {
688 n->guest_hdr_len = n->mergeable_rx_bufs ?
689 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
690 sizeof(struct virtio_net_hdr);
691 }
692
693 for (i = 0; i < n->max_queue_pairs; i++) {
694 nc = qemu_get_subqueue(n->nic, i);
695
696 if (peer_has_vnet_hdr(n) &&
697 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
698 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
699 n->host_hdr_len = n->guest_hdr_len;
700 }
701 }
702 }
703
virtio_net_max_tx_queue_size(VirtIONet * n)704 static int virtio_net_max_tx_queue_size(VirtIONet *n)
705 {
706 NetClientState *peer = n->nic_conf.peers.ncs[0];
707
708 /*
709 * Backends other than vhost-user or vhost-vdpa don't support max queue
710 * size.
711 */
712 if (!peer) {
713 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
714 }
715
716 switch(peer->info->type) {
717 case NET_CLIENT_DRIVER_VHOST_USER:
718 case NET_CLIENT_DRIVER_VHOST_VDPA:
719 return VIRTQUEUE_MAX_SIZE;
720 default:
721 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
722 };
723 }
724
peer_attach(VirtIONet * n,int index)725 static int peer_attach(VirtIONet *n, int index)
726 {
727 NetClientState *nc = qemu_get_subqueue(n->nic, index);
728
729 if (!nc->peer) {
730 return 0;
731 }
732
733 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
734 vhost_set_vring_enable(nc->peer, 1);
735 }
736
737 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
738 return 0;
739 }
740
741 if (n->max_queue_pairs == 1) {
742 return 0;
743 }
744
745 return tap_enable(nc->peer);
746 }
747
peer_detach(VirtIONet * n,int index)748 static int peer_detach(VirtIONet *n, int index)
749 {
750 NetClientState *nc = qemu_get_subqueue(n->nic, index);
751
752 if (!nc->peer) {
753 return 0;
754 }
755
756 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
757 vhost_set_vring_enable(nc->peer, 0);
758 }
759
760 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
761 return 0;
762 }
763
764 return tap_disable(nc->peer);
765 }
766
virtio_net_set_queue_pairs(VirtIONet * n)767 static void virtio_net_set_queue_pairs(VirtIONet *n)
768 {
769 int i;
770 int r;
771
772 if (n->nic->peer_deleted) {
773 return;
774 }
775
776 for (i = 0; i < n->max_queue_pairs; i++) {
777 if (i < n->curr_queue_pairs) {
778 r = peer_attach(n, i);
779 assert(!r);
780 } else {
781 r = peer_detach(n, i);
782 assert(!r);
783 }
784 }
785 }
786
787 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
788
virtio_net_get_features(VirtIODevice * vdev,uint64_t features,Error ** errp)789 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
790 Error **errp)
791 {
792 VirtIONet *n = VIRTIO_NET(vdev);
793 NetClientState *nc = qemu_get_queue(n->nic);
794
795 /* Firstly sync all virtio-net possible supported features */
796 features |= n->host_features;
797
798 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
799
800 if (!peer_has_vnet_hdr(n)) {
801 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
802 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
803 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
805
806 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
807 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
808 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
809 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
810
811 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
812 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
813 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
814
815 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
816 }
817
818 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
819 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
820 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
821 }
822
823 if (!peer_has_uso(n)) {
824 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
825 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
826 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
827 }
828
829 if (!get_vhost_net(nc->peer)) {
830 return features;
831 }
832
833 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
834 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
835 }
836 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
837 vdev->backend_features = features;
838
839 if (n->mtu_bypass_backend &&
840 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
841 features |= (1ULL << VIRTIO_NET_F_MTU);
842 }
843
844 /*
845 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
846 * enabled. This happens in the vDPA case.
847 *
848 * Make sure the feature set is not incoherent, as the driver could refuse
849 * to start.
850 *
851 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
852 * helping guest to notify the new location with vDPA devices that does not
853 * support it.
854 */
855 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
856 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
857 }
858
859 return features;
860 }
861
virtio_net_bad_features(VirtIODevice * vdev)862 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
863 {
864 uint64_t features = 0;
865
866 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
867 * but also these: */
868 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
869 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
870 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
871 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
872 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
873
874 return features;
875 }
876
virtio_net_apply_guest_offloads(VirtIONet * n)877 static void virtio_net_apply_guest_offloads(VirtIONet *n)
878 {
879 qemu_set_offload(qemu_get_queue(n->nic)->peer,
880 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
882 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
883 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
884 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
885 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
886 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
887 }
888
virtio_net_guest_offloads_by_features(uint64_t features)889 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
890 {
891 static const uint64_t guest_offloads_mask =
892 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
893 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
894 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
895 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
896 (1ULL << VIRTIO_NET_F_GUEST_UFO) |
897 (1ULL << VIRTIO_NET_F_GUEST_USO4) |
898 (1ULL << VIRTIO_NET_F_GUEST_USO6);
899
900 return guest_offloads_mask & features;
901 }
902
virtio_net_supported_guest_offloads(const VirtIONet * n)903 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
904 {
905 VirtIODevice *vdev = VIRTIO_DEVICE(n);
906 return virtio_net_guest_offloads_by_features(vdev->guest_features);
907 }
908
909 typedef struct {
910 VirtIONet *n;
911 DeviceState *dev;
912 } FailoverDevice;
913
914 /**
915 * Set the failover primary device
916 *
917 * @opaque: FailoverId to setup
918 * @opts: opts for device we are handling
919 * @errp: returns an error if this function fails
920 */
failover_set_primary(DeviceState * dev,void * opaque)921 static int failover_set_primary(DeviceState *dev, void *opaque)
922 {
923 FailoverDevice *fdev = opaque;
924 PCIDevice *pci_dev = (PCIDevice *)
925 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
926
927 if (!pci_dev) {
928 return 0;
929 }
930
931 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
932 fdev->dev = dev;
933 return 1;
934 }
935
936 return 0;
937 }
938
939 /**
940 * Find the primary device for this failover virtio-net
941 *
942 * @n: VirtIONet device
943 * @errp: returns an error if this function fails
944 */
failover_find_primary_device(VirtIONet * n)945 static DeviceState *failover_find_primary_device(VirtIONet *n)
946 {
947 FailoverDevice fdev = {
948 .n = n,
949 };
950
951 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
952 NULL, NULL, &fdev);
953 return fdev.dev;
954 }
955
failover_add_primary(VirtIONet * n,Error ** errp)956 static void failover_add_primary(VirtIONet *n, Error **errp)
957 {
958 Error *err = NULL;
959 DeviceState *dev = failover_find_primary_device(n);
960
961 if (dev) {
962 return;
963 }
964
965 if (!n->primary_opts) {
966 error_setg(errp, "Primary device not found");
967 error_append_hint(errp, "Virtio-net failover will not work. Make "
968 "sure primary device has parameter"
969 " failover_pair_id=%s\n", n->netclient_name);
970 return;
971 }
972
973 dev = qdev_device_add_from_qdict(n->primary_opts,
974 n->primary_opts_from_json,
975 &err);
976 if (err) {
977 qobject_unref(n->primary_opts);
978 n->primary_opts = NULL;
979 } else {
980 object_unref(OBJECT(dev));
981 }
982 error_propagate(errp, err);
983 }
984
virtio_net_set_features(VirtIODevice * vdev,uint64_t features)985 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
986 {
987 VirtIONet *n = VIRTIO_NET(vdev);
988 Error *err = NULL;
989 int i;
990
991 if (n->mtu_bypass_backend &&
992 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
993 features &= ~(1ULL << VIRTIO_NET_F_MTU);
994 }
995
996 virtio_net_set_multiqueue(n,
997 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
998 virtio_has_feature(features, VIRTIO_NET_F_MQ));
999
1000 virtio_net_set_mrg_rx_bufs(n,
1001 virtio_has_feature(features,
1002 VIRTIO_NET_F_MRG_RXBUF),
1003 virtio_has_feature(features,
1004 VIRTIO_F_VERSION_1),
1005 virtio_has_feature(features,
1006 VIRTIO_NET_F_HASH_REPORT));
1007
1008 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1009 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
1010 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1011 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
1012 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
1013
1014 if (n->has_vnet_hdr) {
1015 n->curr_guest_offloads =
1016 virtio_net_guest_offloads_by_features(features);
1017 virtio_net_apply_guest_offloads(n);
1018 }
1019
1020 for (i = 0; i < n->max_queue_pairs; i++) {
1021 NetClientState *nc = qemu_get_subqueue(n->nic, i);
1022
1023 if (!get_vhost_net(nc->peer)) {
1024 continue;
1025 }
1026 vhost_net_ack_features(get_vhost_net(nc->peer), features);
1027
1028 /*
1029 * keep acked_features in NetVhostUserState up-to-date so it
1030 * can't miss any features configured by guest virtio driver.
1031 */
1032 vhost_net_save_acked_features(nc->peer);
1033 }
1034
1035 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1036 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1037 }
1038
1039 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1040 qapi_event_send_failover_negotiated(n->netclient_name);
1041 qatomic_set(&n->failover_primary_hidden, false);
1042 failover_add_primary(n, &err);
1043 if (err) {
1044 if (!qtest_enabled()) {
1045 warn_report_err(err);
1046 } else {
1047 error_free(err);
1048 }
1049 }
1050 }
1051 }
1052
virtio_net_handle_rx_mode(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1053 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1054 struct iovec *iov, unsigned int iov_cnt)
1055 {
1056 uint8_t on;
1057 size_t s;
1058 NetClientState *nc = qemu_get_queue(n->nic);
1059
1060 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1061 if (s != sizeof(on)) {
1062 return VIRTIO_NET_ERR;
1063 }
1064
1065 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1066 n->promisc = on;
1067 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1068 n->allmulti = on;
1069 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1070 n->alluni = on;
1071 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1072 n->nomulti = on;
1073 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1074 n->nouni = on;
1075 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1076 n->nobcast = on;
1077 } else {
1078 return VIRTIO_NET_ERR;
1079 }
1080
1081 rxfilter_notify(nc);
1082
1083 return VIRTIO_NET_OK;
1084 }
1085
virtio_net_handle_offloads(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1086 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1087 struct iovec *iov, unsigned int iov_cnt)
1088 {
1089 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1090 uint64_t offloads;
1091 size_t s;
1092
1093 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1094 return VIRTIO_NET_ERR;
1095 }
1096
1097 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1098 if (s != sizeof(offloads)) {
1099 return VIRTIO_NET_ERR;
1100 }
1101
1102 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1103 uint64_t supported_offloads;
1104
1105 offloads = virtio_ldq_p(vdev, &offloads);
1106
1107 if (!n->has_vnet_hdr) {
1108 return VIRTIO_NET_ERR;
1109 }
1110
1111 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1112 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1113 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1114 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1115 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1116
1117 supported_offloads = virtio_net_supported_guest_offloads(n);
1118 if (offloads & ~supported_offloads) {
1119 return VIRTIO_NET_ERR;
1120 }
1121
1122 n->curr_guest_offloads = offloads;
1123 virtio_net_apply_guest_offloads(n);
1124
1125 return VIRTIO_NET_OK;
1126 } else {
1127 return VIRTIO_NET_ERR;
1128 }
1129 }
1130
virtio_net_handle_mac(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1131 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1132 struct iovec *iov, unsigned int iov_cnt)
1133 {
1134 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1135 struct virtio_net_ctrl_mac mac_data;
1136 size_t s;
1137 NetClientState *nc = qemu_get_queue(n->nic);
1138
1139 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1140 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1141 return VIRTIO_NET_ERR;
1142 }
1143 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1144 assert(s == sizeof(n->mac));
1145 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1146 rxfilter_notify(nc);
1147
1148 return VIRTIO_NET_OK;
1149 }
1150
1151 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1152 return VIRTIO_NET_ERR;
1153 }
1154
1155 int in_use = 0;
1156 int first_multi = 0;
1157 uint8_t uni_overflow = 0;
1158 uint8_t multi_overflow = 0;
1159 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1160
1161 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1162 sizeof(mac_data.entries));
1163 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1164 if (s != sizeof(mac_data.entries)) {
1165 goto error;
1166 }
1167 iov_discard_front(&iov, &iov_cnt, s);
1168
1169 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1170 goto error;
1171 }
1172
1173 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1174 s = iov_to_buf(iov, iov_cnt, 0, macs,
1175 mac_data.entries * ETH_ALEN);
1176 if (s != mac_data.entries * ETH_ALEN) {
1177 goto error;
1178 }
1179 in_use += mac_data.entries;
1180 } else {
1181 uni_overflow = 1;
1182 }
1183
1184 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1185
1186 first_multi = in_use;
1187
1188 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1189 sizeof(mac_data.entries));
1190 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1191 if (s != sizeof(mac_data.entries)) {
1192 goto error;
1193 }
1194
1195 iov_discard_front(&iov, &iov_cnt, s);
1196
1197 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1198 goto error;
1199 }
1200
1201 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1202 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1203 mac_data.entries * ETH_ALEN);
1204 if (s != mac_data.entries * ETH_ALEN) {
1205 goto error;
1206 }
1207 in_use += mac_data.entries;
1208 } else {
1209 multi_overflow = 1;
1210 }
1211
1212 n->mac_table.in_use = in_use;
1213 n->mac_table.first_multi = first_multi;
1214 n->mac_table.uni_overflow = uni_overflow;
1215 n->mac_table.multi_overflow = multi_overflow;
1216 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1217 g_free(macs);
1218 rxfilter_notify(nc);
1219
1220 return VIRTIO_NET_OK;
1221
1222 error:
1223 g_free(macs);
1224 return VIRTIO_NET_ERR;
1225 }
1226
virtio_net_handle_vlan_table(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1227 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1228 struct iovec *iov, unsigned int iov_cnt)
1229 {
1230 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1231 uint16_t vid;
1232 size_t s;
1233 NetClientState *nc = qemu_get_queue(n->nic);
1234
1235 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1236 vid = virtio_lduw_p(vdev, &vid);
1237 if (s != sizeof(vid)) {
1238 return VIRTIO_NET_ERR;
1239 }
1240
1241 if (vid >= MAX_VLAN)
1242 return VIRTIO_NET_ERR;
1243
1244 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1245 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1246 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1247 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1248 else
1249 return VIRTIO_NET_ERR;
1250
1251 rxfilter_notify(nc);
1252
1253 return VIRTIO_NET_OK;
1254 }
1255
virtio_net_handle_announce(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1256 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1257 struct iovec *iov, unsigned int iov_cnt)
1258 {
1259 trace_virtio_net_handle_announce(n->announce_timer.round);
1260 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1261 n->status & VIRTIO_NET_S_ANNOUNCE) {
1262 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1263 if (n->announce_timer.round) {
1264 qemu_announce_timer_step(&n->announce_timer);
1265 }
1266 return VIRTIO_NET_OK;
1267 } else {
1268 return VIRTIO_NET_ERR;
1269 }
1270 }
1271
1272 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1273
virtio_net_disable_rss(VirtIONet * n)1274 static void virtio_net_disable_rss(VirtIONet *n)
1275 {
1276 if (n->rss_data.enabled) {
1277 trace_virtio_net_rss_disable();
1278 }
1279 n->rss_data.enabled = false;
1280
1281 virtio_net_detach_epbf_rss(n);
1282 }
1283
virtio_net_attach_ebpf_to_backend(NICState * nic,int prog_fd)1284 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1285 {
1286 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1287 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1288 return false;
1289 }
1290
1291 return nc->info->set_steering_ebpf(nc, prog_fd);
1292 }
1293
rss_data_to_rss_config(struct VirtioNetRssData * data,struct EBPFRSSConfig * config)1294 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1295 struct EBPFRSSConfig *config)
1296 {
1297 config->redirect = data->redirect;
1298 config->populate_hash = data->populate_hash;
1299 config->hash_types = data->hash_types;
1300 config->indirections_len = data->indirections_len;
1301 config->default_queue = data->default_queue;
1302 }
1303
virtio_net_attach_epbf_rss(VirtIONet * n)1304 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1305 {
1306 struct EBPFRSSConfig config = {};
1307
1308 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1309 return false;
1310 }
1311
1312 rss_data_to_rss_config(&n->rss_data, &config);
1313
1314 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1315 n->rss_data.indirections_table, n->rss_data.key)) {
1316 return false;
1317 }
1318
1319 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1320 return false;
1321 }
1322
1323 return true;
1324 }
1325
virtio_net_detach_epbf_rss(VirtIONet * n)1326 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1327 {
1328 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1329 }
1330
virtio_net_load_ebpf(VirtIONet * n)1331 static bool virtio_net_load_ebpf(VirtIONet *n)
1332 {
1333 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1334 /* backend doesn't support steering ebpf */
1335 return false;
1336 }
1337
1338 return ebpf_rss_load(&n->ebpf_rss);
1339 }
1340
virtio_net_unload_ebpf(VirtIONet * n)1341 static void virtio_net_unload_ebpf(VirtIONet *n)
1342 {
1343 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1344 ebpf_rss_unload(&n->ebpf_rss);
1345 }
1346
virtio_net_handle_rss(VirtIONet * n,struct iovec * iov,unsigned int iov_cnt,bool do_rss)1347 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1348 struct iovec *iov,
1349 unsigned int iov_cnt,
1350 bool do_rss)
1351 {
1352 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1353 struct virtio_net_rss_config cfg;
1354 size_t s, offset = 0, size_get;
1355 uint16_t queue_pairs, i;
1356 struct {
1357 uint16_t us;
1358 uint8_t b;
1359 } QEMU_PACKED temp;
1360 const char *err_msg = "";
1361 uint32_t err_value = 0;
1362
1363 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1364 err_msg = "RSS is not negotiated";
1365 goto error;
1366 }
1367 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1368 err_msg = "Hash report is not negotiated";
1369 goto error;
1370 }
1371 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1372 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1373 if (s != size_get) {
1374 err_msg = "Short command buffer";
1375 err_value = (uint32_t)s;
1376 goto error;
1377 }
1378 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1379 n->rss_data.indirections_len =
1380 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1381 n->rss_data.indirections_len++;
1382 if (!do_rss) {
1383 n->rss_data.indirections_len = 1;
1384 }
1385 if (!is_power_of_2(n->rss_data.indirections_len)) {
1386 err_msg = "Invalid size of indirection table";
1387 err_value = n->rss_data.indirections_len;
1388 goto error;
1389 }
1390 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1391 err_msg = "Too large indirection table";
1392 err_value = n->rss_data.indirections_len;
1393 goto error;
1394 }
1395 n->rss_data.default_queue = do_rss ?
1396 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1397 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1398 err_msg = "Invalid default queue";
1399 err_value = n->rss_data.default_queue;
1400 goto error;
1401 }
1402 offset += size_get;
1403 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1404 g_free(n->rss_data.indirections_table);
1405 n->rss_data.indirections_table = g_malloc(size_get);
1406 if (!n->rss_data.indirections_table) {
1407 err_msg = "Can't allocate indirections table";
1408 err_value = n->rss_data.indirections_len;
1409 goto error;
1410 }
1411 s = iov_to_buf(iov, iov_cnt, offset,
1412 n->rss_data.indirections_table, size_get);
1413 if (s != size_get) {
1414 err_msg = "Short indirection table buffer";
1415 err_value = (uint32_t)s;
1416 goto error;
1417 }
1418 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1419 uint16_t val = n->rss_data.indirections_table[i];
1420 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1421 }
1422 offset += size_get;
1423 size_get = sizeof(temp);
1424 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1425 if (s != size_get) {
1426 err_msg = "Can't get queue_pairs";
1427 err_value = (uint32_t)s;
1428 goto error;
1429 }
1430 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1431 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1432 err_msg = "Invalid number of queue_pairs";
1433 err_value = queue_pairs;
1434 goto error;
1435 }
1436 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1437 err_msg = "Invalid key size";
1438 err_value = temp.b;
1439 goto error;
1440 }
1441 if (!temp.b && n->rss_data.hash_types) {
1442 err_msg = "No key provided";
1443 err_value = 0;
1444 goto error;
1445 }
1446 if (!temp.b && !n->rss_data.hash_types) {
1447 virtio_net_disable_rss(n);
1448 return queue_pairs;
1449 }
1450 offset += size_get;
1451 size_get = temp.b;
1452 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1453 if (s != size_get) {
1454 err_msg = "Can get key buffer";
1455 err_value = (uint32_t)s;
1456 goto error;
1457 }
1458 n->rss_data.enabled = true;
1459
1460 if (!n->rss_data.populate_hash) {
1461 if (!virtio_net_attach_epbf_rss(n)) {
1462 /* EBPF must be loaded for vhost */
1463 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1464 warn_report("Can't load eBPF RSS for vhost");
1465 goto error;
1466 }
1467 /* fallback to software RSS */
1468 warn_report("Can't load eBPF RSS - fallback to software RSS");
1469 n->rss_data.enabled_software_rss = true;
1470 }
1471 } else {
1472 /* use software RSS for hash populating */
1473 /* and detach eBPF if was loaded before */
1474 virtio_net_detach_epbf_rss(n);
1475 n->rss_data.enabled_software_rss = true;
1476 }
1477
1478 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1479 n->rss_data.indirections_len,
1480 temp.b);
1481 return queue_pairs;
1482 error:
1483 trace_virtio_net_rss_error(err_msg, err_value);
1484 virtio_net_disable_rss(n);
1485 return 0;
1486 }
1487
virtio_net_handle_mq(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1488 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1489 struct iovec *iov, unsigned int iov_cnt)
1490 {
1491 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1492 uint16_t queue_pairs;
1493 NetClientState *nc = qemu_get_queue(n->nic);
1494
1495 virtio_net_disable_rss(n);
1496 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1497 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1498 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1499 }
1500 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1501 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1502 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1503 struct virtio_net_ctrl_mq mq;
1504 size_t s;
1505 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1506 return VIRTIO_NET_ERR;
1507 }
1508 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1509 if (s != sizeof(mq)) {
1510 return VIRTIO_NET_ERR;
1511 }
1512 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1513
1514 } else {
1515 return VIRTIO_NET_ERR;
1516 }
1517
1518 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1519 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1520 queue_pairs > n->max_queue_pairs ||
1521 !n->multiqueue) {
1522 return VIRTIO_NET_ERR;
1523 }
1524
1525 n->curr_queue_pairs = queue_pairs;
1526 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1527 /*
1528 * Avoid updating the backend for a vdpa device: We're only interested
1529 * in updating the device model queues.
1530 */
1531 return VIRTIO_NET_OK;
1532 }
1533 /* stop the backend before changing the number of queue_pairs to avoid handling a
1534 * disabled queue */
1535 virtio_net_set_status(vdev, vdev->status);
1536 virtio_net_set_queue_pairs(n);
1537
1538 return VIRTIO_NET_OK;
1539 }
1540
virtio_net_handle_ctrl_iov(VirtIODevice * vdev,const struct iovec * in_sg,unsigned in_num,const struct iovec * out_sg,unsigned out_num)1541 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1542 const struct iovec *in_sg, unsigned in_num,
1543 const struct iovec *out_sg,
1544 unsigned out_num)
1545 {
1546 VirtIONet *n = VIRTIO_NET(vdev);
1547 struct virtio_net_ctrl_hdr ctrl;
1548 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1549 size_t s;
1550 struct iovec *iov, *iov2;
1551
1552 if (iov_size(in_sg, in_num) < sizeof(status) ||
1553 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1554 virtio_error(vdev, "virtio-net ctrl missing headers");
1555 return 0;
1556 }
1557
1558 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1559 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1560 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1561 if (s != sizeof(ctrl)) {
1562 status = VIRTIO_NET_ERR;
1563 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1564 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1565 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1566 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1567 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1568 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1569 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1570 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1571 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1572 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1573 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1574 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1575 }
1576
1577 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1578 assert(s == sizeof(status));
1579
1580 g_free(iov2);
1581 return sizeof(status);
1582 }
1583
virtio_net_handle_ctrl(VirtIODevice * vdev,VirtQueue * vq)1584 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1585 {
1586 VirtQueueElement *elem;
1587
1588 for (;;) {
1589 size_t written;
1590 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1591 if (!elem) {
1592 break;
1593 }
1594
1595 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1596 elem->out_sg, elem->out_num);
1597 if (written > 0) {
1598 virtqueue_push(vq, elem, written);
1599 virtio_notify(vdev, vq);
1600 g_free(elem);
1601 } else {
1602 virtqueue_detach_element(vq, elem, 0);
1603 g_free(elem);
1604 break;
1605 }
1606 }
1607 }
1608
1609 /* RX */
1610
virtio_net_handle_rx(VirtIODevice * vdev,VirtQueue * vq)1611 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1612 {
1613 VirtIONet *n = VIRTIO_NET(vdev);
1614 int queue_index = vq2q(virtio_get_queue_index(vq));
1615
1616 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1617 }
1618
virtio_net_can_receive(NetClientState * nc)1619 static bool virtio_net_can_receive(NetClientState *nc)
1620 {
1621 VirtIONet *n = qemu_get_nic_opaque(nc);
1622 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1623 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1624
1625 if (!vdev->vm_running) {
1626 return false;
1627 }
1628
1629 if (nc->queue_index >= n->curr_queue_pairs) {
1630 return false;
1631 }
1632
1633 if (!virtio_queue_ready(q->rx_vq) ||
1634 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1635 return false;
1636 }
1637
1638 return true;
1639 }
1640
virtio_net_has_buffers(VirtIONetQueue * q,int bufsize)1641 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1642 {
1643 VirtIONet *n = q->n;
1644 if (virtio_queue_empty(q->rx_vq) ||
1645 (n->mergeable_rx_bufs &&
1646 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1647 virtio_queue_set_notification(q->rx_vq, 1);
1648
1649 /* To avoid a race condition where the guest has made some buffers
1650 * available after the above check but before notification was
1651 * enabled, check for available buffers again.
1652 */
1653 if (virtio_queue_empty(q->rx_vq) ||
1654 (n->mergeable_rx_bufs &&
1655 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1656 return 0;
1657 }
1658 }
1659
1660 virtio_queue_set_notification(q->rx_vq, 0);
1661 return 1;
1662 }
1663
virtio_net_hdr_swap(VirtIODevice * vdev,struct virtio_net_hdr * hdr)1664 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1665 {
1666 virtio_tswap16s(vdev, &hdr->hdr_len);
1667 virtio_tswap16s(vdev, &hdr->gso_size);
1668 virtio_tswap16s(vdev, &hdr->csum_start);
1669 virtio_tswap16s(vdev, &hdr->csum_offset);
1670 }
1671
1672 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1673 * it never finds out that the packets don't have valid checksums. This
1674 * causes dhclient to get upset. Fedora's carried a patch for ages to
1675 * fix this with Xen but it hasn't appeared in an upstream release of
1676 * dhclient yet.
1677 *
1678 * To avoid breaking existing guests, we catch udp packets and add
1679 * checksums. This is terrible but it's better than hacking the guest
1680 * kernels.
1681 *
1682 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1683 * we should provide a mechanism to disable it to avoid polluting the host
1684 * cache.
1685 */
work_around_broken_dhclient(struct virtio_net_hdr * hdr,uint8_t * buf,size_t size)1686 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1687 uint8_t *buf, size_t size)
1688 {
1689 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1690 (size > 27 && size < 1500) && /* normal sized MTU */
1691 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1692 (buf[23] == 17) && /* ip.protocol == UDP */
1693 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1694 net_checksum_calculate(buf, size, CSUM_UDP);
1695 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1696 }
1697 }
1698
receive_header(VirtIONet * n,const struct iovec * iov,int iov_cnt,const void * buf,size_t size)1699 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1700 const void *buf, size_t size)
1701 {
1702 if (n->has_vnet_hdr) {
1703 /* FIXME this cast is evil */
1704 void *wbuf = (void *)buf;
1705 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1706 size - n->host_hdr_len);
1707
1708 if (n->needs_vnet_hdr_swap) {
1709 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1710 }
1711 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1712 } else {
1713 struct virtio_net_hdr hdr = {
1714 .flags = 0,
1715 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1716 };
1717 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1718 }
1719 }
1720
receive_filter(VirtIONet * n,const uint8_t * buf,int size)1721 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1722 {
1723 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1724 static const uint8_t vlan[] = {0x81, 0x00};
1725 uint8_t *ptr = (uint8_t *)buf;
1726 int i;
1727
1728 if (n->promisc)
1729 return 1;
1730
1731 ptr += n->host_hdr_len;
1732
1733 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1734 int vid = lduw_be_p(ptr + 14) & 0xfff;
1735 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1736 return 0;
1737 }
1738
1739 if (ptr[0] & 1) { // multicast
1740 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1741 return !n->nobcast;
1742 } else if (n->nomulti) {
1743 return 0;
1744 } else if (n->allmulti || n->mac_table.multi_overflow) {
1745 return 1;
1746 }
1747
1748 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1749 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1750 return 1;
1751 }
1752 }
1753 } else { // unicast
1754 if (n->nouni) {
1755 return 0;
1756 } else if (n->alluni || n->mac_table.uni_overflow) {
1757 return 1;
1758 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1759 return 1;
1760 }
1761
1762 for (i = 0; i < n->mac_table.first_multi; i++) {
1763 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1764 return 1;
1765 }
1766 }
1767 }
1768
1769 return 0;
1770 }
1771
virtio_net_get_hash_type(bool hasip4,bool hasip6,EthL4HdrProto l4hdr_proto,uint32_t types)1772 static uint8_t virtio_net_get_hash_type(bool hasip4,
1773 bool hasip6,
1774 EthL4HdrProto l4hdr_proto,
1775 uint32_t types)
1776 {
1777 if (hasip4) {
1778 switch (l4hdr_proto) {
1779 case ETH_L4_HDR_PROTO_TCP:
1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1781 return NetPktRssIpV4Tcp;
1782 }
1783 break;
1784
1785 case ETH_L4_HDR_PROTO_UDP:
1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1787 return NetPktRssIpV4Udp;
1788 }
1789 break;
1790
1791 default:
1792 break;
1793 }
1794
1795 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1796 return NetPktRssIpV4;
1797 }
1798 } else if (hasip6) {
1799 switch (l4hdr_proto) {
1800 case ETH_L4_HDR_PROTO_TCP:
1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1802 return NetPktRssIpV6TcpEx;
1803 }
1804 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1805 return NetPktRssIpV6Tcp;
1806 }
1807 break;
1808
1809 case ETH_L4_HDR_PROTO_UDP:
1810 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1811 return NetPktRssIpV6UdpEx;
1812 }
1813 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1814 return NetPktRssIpV6Udp;
1815 }
1816 break;
1817
1818 default:
1819 break;
1820 }
1821
1822 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1823 return NetPktRssIpV6Ex;
1824 }
1825 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1826 return NetPktRssIpV6;
1827 }
1828 }
1829 return 0xff;
1830 }
1831
virtio_set_packet_hash(const uint8_t * buf,uint8_t report,uint32_t hash)1832 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1833 uint32_t hash)
1834 {
1835 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1836 hdr->hash_value = hash;
1837 hdr->hash_report = report;
1838 }
1839
virtio_net_process_rss(NetClientState * nc,const uint8_t * buf,size_t size)1840 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1841 size_t size)
1842 {
1843 VirtIONet *n = qemu_get_nic_opaque(nc);
1844 unsigned int index = nc->queue_index, new_index = index;
1845 struct NetRxPkt *pkt = n->rx_pkt;
1846 uint8_t net_hash_type;
1847 uint32_t hash;
1848 bool hasip4, hasip6;
1849 EthL4HdrProto l4hdr_proto;
1850 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1851 VIRTIO_NET_HASH_REPORT_IPv4,
1852 VIRTIO_NET_HASH_REPORT_TCPv4,
1853 VIRTIO_NET_HASH_REPORT_TCPv6,
1854 VIRTIO_NET_HASH_REPORT_IPv6,
1855 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1856 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1857 VIRTIO_NET_HASH_REPORT_UDPv4,
1858 VIRTIO_NET_HASH_REPORT_UDPv6,
1859 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1860 };
1861 struct iovec iov = {
1862 .iov_base = (void *)buf,
1863 .iov_len = size
1864 };
1865
1866 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1867 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1868 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1869 n->rss_data.hash_types);
1870 if (net_hash_type > NetPktRssIpV6UdpEx) {
1871 if (n->rss_data.populate_hash) {
1872 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1873 }
1874 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1875 }
1876
1877 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1878
1879 if (n->rss_data.populate_hash) {
1880 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1881 }
1882
1883 if (n->rss_data.redirect) {
1884 new_index = hash & (n->rss_data.indirections_len - 1);
1885 new_index = n->rss_data.indirections_table[new_index];
1886 }
1887
1888 return (index == new_index) ? -1 : new_index;
1889 }
1890
virtio_net_receive_rcu(NetClientState * nc,const uint8_t * buf,size_t size,bool no_rss)1891 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1892 size_t size, bool no_rss)
1893 {
1894 VirtIONet *n = qemu_get_nic_opaque(nc);
1895 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1896 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1897 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1898 size_t lens[VIRTQUEUE_MAX_SIZE];
1899 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1900 struct virtio_net_hdr_mrg_rxbuf mhdr;
1901 unsigned mhdr_cnt = 0;
1902 size_t offset, i, guest_offset, j;
1903 ssize_t err;
1904
1905 if (!virtio_net_can_receive(nc)) {
1906 return -1;
1907 }
1908
1909 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1910 int index = virtio_net_process_rss(nc, buf, size);
1911 if (index >= 0) {
1912 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1913 return virtio_net_receive_rcu(nc2, buf, size, true);
1914 }
1915 }
1916
1917 /* hdr_len refers to the header we supply to the guest */
1918 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1919 return 0;
1920 }
1921
1922 if (!receive_filter(n, buf, size))
1923 return size;
1924
1925 offset = i = 0;
1926
1927 while (offset < size) {
1928 VirtQueueElement *elem;
1929 int len, total;
1930 const struct iovec *sg;
1931
1932 total = 0;
1933
1934 if (i == VIRTQUEUE_MAX_SIZE) {
1935 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1936 err = size;
1937 goto err;
1938 }
1939
1940 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1941 if (!elem) {
1942 if (i) {
1943 virtio_error(vdev, "virtio-net unexpected empty queue: "
1944 "i %zd mergeable %d offset %zd, size %zd, "
1945 "guest hdr len %zd, host hdr len %zd "
1946 "guest features 0x%" PRIx64,
1947 i, n->mergeable_rx_bufs, offset, size,
1948 n->guest_hdr_len, n->host_hdr_len,
1949 vdev->guest_features);
1950 }
1951 err = -1;
1952 goto err;
1953 }
1954
1955 if (elem->in_num < 1) {
1956 virtio_error(vdev,
1957 "virtio-net receive queue contains no in buffers");
1958 virtqueue_detach_element(q->rx_vq, elem, 0);
1959 g_free(elem);
1960 err = -1;
1961 goto err;
1962 }
1963
1964 sg = elem->in_sg;
1965 if (i == 0) {
1966 assert(offset == 0);
1967 if (n->mergeable_rx_bufs) {
1968 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1969 sg, elem->in_num,
1970 offsetof(typeof(mhdr), num_buffers),
1971 sizeof(mhdr.num_buffers));
1972 }
1973
1974 receive_header(n, sg, elem->in_num, buf, size);
1975 if (n->rss_data.populate_hash) {
1976 offset = sizeof(mhdr);
1977 iov_from_buf(sg, elem->in_num, offset,
1978 buf + offset, n->host_hdr_len - sizeof(mhdr));
1979 }
1980 offset = n->host_hdr_len;
1981 total += n->guest_hdr_len;
1982 guest_offset = n->guest_hdr_len;
1983 } else {
1984 guest_offset = 0;
1985 }
1986
1987 /* copy in packet. ugh */
1988 len = iov_from_buf(sg, elem->in_num, guest_offset,
1989 buf + offset, size - offset);
1990 total += len;
1991 offset += len;
1992 /* If buffers can't be merged, at this point we
1993 * must have consumed the complete packet.
1994 * Otherwise, drop it. */
1995 if (!n->mergeable_rx_bufs && offset < size) {
1996 virtqueue_unpop(q->rx_vq, elem, total);
1997 g_free(elem);
1998 err = size;
1999 goto err;
2000 }
2001
2002 elems[i] = elem;
2003 lens[i] = total;
2004 i++;
2005 }
2006
2007 if (mhdr_cnt) {
2008 virtio_stw_p(vdev, &mhdr.num_buffers, i);
2009 iov_from_buf(mhdr_sg, mhdr_cnt,
2010 0,
2011 &mhdr.num_buffers, sizeof mhdr.num_buffers);
2012 }
2013
2014 for (j = 0; j < i; j++) {
2015 /* signal other side */
2016 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2017 g_free(elems[j]);
2018 }
2019
2020 virtqueue_flush(q->rx_vq, i);
2021 virtio_notify(vdev, q->rx_vq);
2022
2023 return size;
2024
2025 err:
2026 for (j = 0; j < i; j++) {
2027 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2028 g_free(elems[j]);
2029 }
2030
2031 return err;
2032 }
2033
virtio_net_do_receive(NetClientState * nc,const uint8_t * buf,size_t size)2034 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2035 size_t size)
2036 {
2037 RCU_READ_LOCK_GUARD();
2038
2039 return virtio_net_receive_rcu(nc, buf, size, false);
2040 }
2041
virtio_net_rsc_extract_unit4(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2042 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2043 const uint8_t *buf,
2044 VirtioNetRscUnit *unit)
2045 {
2046 uint16_t ip_hdrlen;
2047 struct ip_header *ip;
2048
2049 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2050 + sizeof(struct eth_header));
2051 unit->ip = (void *)ip;
2052 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2053 unit->ip_plen = &ip->ip_len;
2054 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2055 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2056 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2057 }
2058
virtio_net_rsc_extract_unit6(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2059 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2060 const uint8_t *buf,
2061 VirtioNetRscUnit *unit)
2062 {
2063 struct ip6_header *ip6;
2064
2065 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2066 + sizeof(struct eth_header));
2067 unit->ip = ip6;
2068 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2069 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2070 + sizeof(struct ip6_header));
2071 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2072
2073 /* There is a difference between payload length in ipv4 and v6,
2074 ip header is excluded in ipv6 */
2075 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2076 }
2077
virtio_net_rsc_drain_seg(VirtioNetRscChain * chain,VirtioNetRscSeg * seg)2078 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2079 VirtioNetRscSeg *seg)
2080 {
2081 int ret;
2082 struct virtio_net_hdr_v1 *h;
2083
2084 h = (struct virtio_net_hdr_v1 *)seg->buf;
2085 h->flags = 0;
2086 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2087
2088 if (seg->is_coalesced) {
2089 h->rsc.segments = seg->packets;
2090 h->rsc.dup_acks = seg->dup_ack;
2091 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2092 if (chain->proto == ETH_P_IP) {
2093 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2094 } else {
2095 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2096 }
2097 }
2098
2099 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2100 QTAILQ_REMOVE(&chain->buffers, seg, next);
2101 g_free(seg->buf);
2102 g_free(seg);
2103
2104 return ret;
2105 }
2106
virtio_net_rsc_purge(void * opq)2107 static void virtio_net_rsc_purge(void *opq)
2108 {
2109 VirtioNetRscSeg *seg, *rn;
2110 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2111
2112 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2113 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2114 chain->stat.purge_failed++;
2115 continue;
2116 }
2117 }
2118
2119 chain->stat.timer++;
2120 if (!QTAILQ_EMPTY(&chain->buffers)) {
2121 timer_mod(chain->drain_timer,
2122 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2123 }
2124 }
2125
virtio_net_rsc_cleanup(VirtIONet * n)2126 static void virtio_net_rsc_cleanup(VirtIONet *n)
2127 {
2128 VirtioNetRscChain *chain, *rn_chain;
2129 VirtioNetRscSeg *seg, *rn_seg;
2130
2131 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2132 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2133 QTAILQ_REMOVE(&chain->buffers, seg, next);
2134 g_free(seg->buf);
2135 g_free(seg);
2136 }
2137
2138 timer_free(chain->drain_timer);
2139 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2140 g_free(chain);
2141 }
2142 }
2143
virtio_net_rsc_cache_buf(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2144 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2145 NetClientState *nc,
2146 const uint8_t *buf, size_t size)
2147 {
2148 uint16_t hdr_len;
2149 VirtioNetRscSeg *seg;
2150
2151 hdr_len = chain->n->guest_hdr_len;
2152 seg = g_new(VirtioNetRscSeg, 1);
2153 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2154 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2155 memcpy(seg->buf, buf, size);
2156 seg->size = size;
2157 seg->packets = 1;
2158 seg->dup_ack = 0;
2159 seg->is_coalesced = 0;
2160 seg->nc = nc;
2161
2162 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2163 chain->stat.cache++;
2164
2165 switch (chain->proto) {
2166 case ETH_P_IP:
2167 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2168 break;
2169 case ETH_P_IPV6:
2170 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2171 break;
2172 default:
2173 g_assert_not_reached();
2174 }
2175 }
2176
virtio_net_rsc_handle_ack(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,struct tcp_header * n_tcp,struct tcp_header * o_tcp)2177 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2178 VirtioNetRscSeg *seg,
2179 const uint8_t *buf,
2180 struct tcp_header *n_tcp,
2181 struct tcp_header *o_tcp)
2182 {
2183 uint32_t nack, oack;
2184 uint16_t nwin, owin;
2185
2186 nack = htonl(n_tcp->th_ack);
2187 nwin = htons(n_tcp->th_win);
2188 oack = htonl(o_tcp->th_ack);
2189 owin = htons(o_tcp->th_win);
2190
2191 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2192 chain->stat.ack_out_of_win++;
2193 return RSC_FINAL;
2194 } else if (nack == oack) {
2195 /* duplicated ack or window probe */
2196 if (nwin == owin) {
2197 /* duplicated ack, add dup ack count due to whql test up to 1 */
2198 chain->stat.dup_ack++;
2199 return RSC_FINAL;
2200 } else {
2201 /* Coalesce window update */
2202 o_tcp->th_win = n_tcp->th_win;
2203 chain->stat.win_update++;
2204 return RSC_COALESCE;
2205 }
2206 } else {
2207 /* pure ack, go to 'C', finalize*/
2208 chain->stat.pure_ack++;
2209 return RSC_FINAL;
2210 }
2211 }
2212
virtio_net_rsc_coalesce_data(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,VirtioNetRscUnit * n_unit)2213 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2214 VirtioNetRscSeg *seg,
2215 const uint8_t *buf,
2216 VirtioNetRscUnit *n_unit)
2217 {
2218 void *data;
2219 uint16_t o_ip_len;
2220 uint32_t nseq, oseq;
2221 VirtioNetRscUnit *o_unit;
2222
2223 o_unit = &seg->unit;
2224 o_ip_len = htons(*o_unit->ip_plen);
2225 nseq = htonl(n_unit->tcp->th_seq);
2226 oseq = htonl(o_unit->tcp->th_seq);
2227
2228 /* out of order or retransmitted. */
2229 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2230 chain->stat.data_out_of_win++;
2231 return RSC_FINAL;
2232 }
2233
2234 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2235 if (nseq == oseq) {
2236 if ((o_unit->payload == 0) && n_unit->payload) {
2237 /* From no payload to payload, normal case, not a dup ack or etc */
2238 chain->stat.data_after_pure_ack++;
2239 goto coalesce;
2240 } else {
2241 return virtio_net_rsc_handle_ack(chain, seg, buf,
2242 n_unit->tcp, o_unit->tcp);
2243 }
2244 } else if ((nseq - oseq) != o_unit->payload) {
2245 /* Not a consistent packet, out of order */
2246 chain->stat.data_out_of_order++;
2247 return RSC_FINAL;
2248 } else {
2249 coalesce:
2250 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2251 chain->stat.over_size++;
2252 return RSC_FINAL;
2253 }
2254
2255 /* Here comes the right data, the payload length in v4/v6 is different,
2256 so use the field value to update and record the new data len */
2257 o_unit->payload += n_unit->payload; /* update new data len */
2258
2259 /* update field in ip header */
2260 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2261
2262 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2263 for windows guest, while this may change the behavior for linux
2264 guest (only if it uses RSC feature). */
2265 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2266
2267 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2268 o_unit->tcp->th_win = n_unit->tcp->th_win;
2269
2270 memmove(seg->buf + seg->size, data, n_unit->payload);
2271 seg->size += n_unit->payload;
2272 seg->packets++;
2273 chain->stat.coalesced++;
2274 return RSC_COALESCE;
2275 }
2276 }
2277
virtio_net_rsc_coalesce4(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2278 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2279 VirtioNetRscSeg *seg,
2280 const uint8_t *buf, size_t size,
2281 VirtioNetRscUnit *unit)
2282 {
2283 struct ip_header *ip1, *ip2;
2284
2285 ip1 = (struct ip_header *)(unit->ip);
2286 ip2 = (struct ip_header *)(seg->unit.ip);
2287 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2288 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2289 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2290 chain->stat.no_match++;
2291 return RSC_NO_MATCH;
2292 }
2293
2294 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2295 }
2296
virtio_net_rsc_coalesce6(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2297 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2298 VirtioNetRscSeg *seg,
2299 const uint8_t *buf, size_t size,
2300 VirtioNetRscUnit *unit)
2301 {
2302 struct ip6_header *ip1, *ip2;
2303
2304 ip1 = (struct ip6_header *)(unit->ip);
2305 ip2 = (struct ip6_header *)(seg->unit.ip);
2306 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2307 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2308 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2309 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2310 chain->stat.no_match++;
2311 return RSC_NO_MATCH;
2312 }
2313
2314 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2315 }
2316
2317 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2318 * to prevent out of order */
virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain * chain,struct tcp_header * tcp)2319 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2320 struct tcp_header *tcp)
2321 {
2322 uint16_t tcp_hdr;
2323 uint16_t tcp_flag;
2324
2325 tcp_flag = htons(tcp->th_offset_flags);
2326 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2327 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2328 if (tcp_flag & TH_SYN) {
2329 chain->stat.tcp_syn++;
2330 return RSC_BYPASS;
2331 }
2332
2333 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2334 chain->stat.tcp_ctrl_drain++;
2335 return RSC_FINAL;
2336 }
2337
2338 if (tcp_hdr > sizeof(struct tcp_header)) {
2339 chain->stat.tcp_all_opt++;
2340 return RSC_FINAL;
2341 }
2342
2343 return RSC_CANDIDATE;
2344 }
2345
virtio_net_rsc_do_coalesce(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2346 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2347 NetClientState *nc,
2348 const uint8_t *buf, size_t size,
2349 VirtioNetRscUnit *unit)
2350 {
2351 int ret;
2352 VirtioNetRscSeg *seg, *nseg;
2353
2354 if (QTAILQ_EMPTY(&chain->buffers)) {
2355 chain->stat.empty_cache++;
2356 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2357 timer_mod(chain->drain_timer,
2358 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2359 return size;
2360 }
2361
2362 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2363 if (chain->proto == ETH_P_IP) {
2364 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2365 } else {
2366 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2367 }
2368
2369 if (ret == RSC_FINAL) {
2370 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2371 /* Send failed */
2372 chain->stat.final_failed++;
2373 return 0;
2374 }
2375
2376 /* Send current packet */
2377 return virtio_net_do_receive(nc, buf, size);
2378 } else if (ret == RSC_NO_MATCH) {
2379 continue;
2380 } else {
2381 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2382 seg->is_coalesced = 1;
2383 return size;
2384 }
2385 }
2386
2387 chain->stat.no_match_cache++;
2388 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2389 return size;
2390 }
2391
2392 /* Drain a connection data, this is to avoid out of order segments */
virtio_net_rsc_drain_flow(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,uint16_t ip_start,uint16_t ip_size,uint16_t tcp_port)2393 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2394 NetClientState *nc,
2395 const uint8_t *buf, size_t size,
2396 uint16_t ip_start, uint16_t ip_size,
2397 uint16_t tcp_port)
2398 {
2399 VirtioNetRscSeg *seg, *nseg;
2400 uint32_t ppair1, ppair2;
2401
2402 ppair1 = *(uint32_t *)(buf + tcp_port);
2403 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2404 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2405 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2406 || (ppair1 != ppair2)) {
2407 continue;
2408 }
2409 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2410 chain->stat.drain_failed++;
2411 }
2412
2413 break;
2414 }
2415
2416 return virtio_net_do_receive(nc, buf, size);
2417 }
2418
virtio_net_rsc_sanity_check4(VirtioNetRscChain * chain,struct ip_header * ip,const uint8_t * buf,size_t size)2419 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2420 struct ip_header *ip,
2421 const uint8_t *buf, size_t size)
2422 {
2423 uint16_t ip_len;
2424
2425 /* Not an ipv4 packet */
2426 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2427 chain->stat.ip_option++;
2428 return RSC_BYPASS;
2429 }
2430
2431 /* Don't handle packets with ip option */
2432 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2433 chain->stat.ip_option++;
2434 return RSC_BYPASS;
2435 }
2436
2437 if (ip->ip_p != IPPROTO_TCP) {
2438 chain->stat.bypass_not_tcp++;
2439 return RSC_BYPASS;
2440 }
2441
2442 /* Don't handle packets with ip fragment */
2443 if (!(htons(ip->ip_off) & IP_DF)) {
2444 chain->stat.ip_frag++;
2445 return RSC_BYPASS;
2446 }
2447
2448 /* Don't handle packets with ecn flag */
2449 if (IPTOS_ECN(ip->ip_tos)) {
2450 chain->stat.ip_ecn++;
2451 return RSC_BYPASS;
2452 }
2453
2454 ip_len = htons(ip->ip_len);
2455 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2456 || ip_len > (size - chain->n->guest_hdr_len -
2457 sizeof(struct eth_header))) {
2458 chain->stat.ip_hacked++;
2459 return RSC_BYPASS;
2460 }
2461
2462 return RSC_CANDIDATE;
2463 }
2464
virtio_net_rsc_receive4(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2465 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2466 NetClientState *nc,
2467 const uint8_t *buf, size_t size)
2468 {
2469 int32_t ret;
2470 uint16_t hdr_len;
2471 VirtioNetRscUnit unit;
2472
2473 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2474
2475 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2476 + sizeof(struct tcp_header))) {
2477 chain->stat.bypass_not_tcp++;
2478 return virtio_net_do_receive(nc, buf, size);
2479 }
2480
2481 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2482 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2483 != RSC_CANDIDATE) {
2484 return virtio_net_do_receive(nc, buf, size);
2485 }
2486
2487 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2488 if (ret == RSC_BYPASS) {
2489 return virtio_net_do_receive(nc, buf, size);
2490 } else if (ret == RSC_FINAL) {
2491 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2492 ((hdr_len + sizeof(struct eth_header)) + 12),
2493 VIRTIO_NET_IP4_ADDR_SIZE,
2494 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2495 }
2496
2497 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2498 }
2499
virtio_net_rsc_sanity_check6(VirtioNetRscChain * chain,struct ip6_header * ip6,const uint8_t * buf,size_t size)2500 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2501 struct ip6_header *ip6,
2502 const uint8_t *buf, size_t size)
2503 {
2504 uint16_t ip_len;
2505
2506 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2507 != IP_HEADER_VERSION_6) {
2508 return RSC_BYPASS;
2509 }
2510
2511 /* Both option and protocol is checked in this */
2512 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2513 chain->stat.bypass_not_tcp++;
2514 return RSC_BYPASS;
2515 }
2516
2517 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2518 if (ip_len < sizeof(struct tcp_header) ||
2519 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2520 - sizeof(struct ip6_header))) {
2521 chain->stat.ip_hacked++;
2522 return RSC_BYPASS;
2523 }
2524
2525 /* Don't handle packets with ecn flag */
2526 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2527 chain->stat.ip_ecn++;
2528 return RSC_BYPASS;
2529 }
2530
2531 return RSC_CANDIDATE;
2532 }
2533
virtio_net_rsc_receive6(void * opq,NetClientState * nc,const uint8_t * buf,size_t size)2534 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2535 const uint8_t *buf, size_t size)
2536 {
2537 int32_t ret;
2538 uint16_t hdr_len;
2539 VirtioNetRscChain *chain;
2540 VirtioNetRscUnit unit;
2541
2542 chain = opq;
2543 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2544
2545 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2546 + sizeof(tcp_header))) {
2547 return virtio_net_do_receive(nc, buf, size);
2548 }
2549
2550 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2551 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2552 unit.ip, buf, size)) {
2553 return virtio_net_do_receive(nc, buf, size);
2554 }
2555
2556 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2557 if (ret == RSC_BYPASS) {
2558 return virtio_net_do_receive(nc, buf, size);
2559 } else if (ret == RSC_FINAL) {
2560 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2561 ((hdr_len + sizeof(struct eth_header)) + 8),
2562 VIRTIO_NET_IP6_ADDR_SIZE,
2563 hdr_len + sizeof(struct eth_header)
2564 + sizeof(struct ip6_header));
2565 }
2566
2567 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2568 }
2569
virtio_net_rsc_lookup_chain(VirtIONet * n,NetClientState * nc,uint16_t proto)2570 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2571 NetClientState *nc,
2572 uint16_t proto)
2573 {
2574 VirtioNetRscChain *chain;
2575
2576 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2577 return NULL;
2578 }
2579
2580 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2581 if (chain->proto == proto) {
2582 return chain;
2583 }
2584 }
2585
2586 chain = g_malloc(sizeof(*chain));
2587 chain->n = n;
2588 chain->proto = proto;
2589 if (proto == (uint16_t)ETH_P_IP) {
2590 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2591 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2592 } else {
2593 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2594 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2595 }
2596 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2597 virtio_net_rsc_purge, chain);
2598 memset(&chain->stat, 0, sizeof(chain->stat));
2599
2600 QTAILQ_INIT(&chain->buffers);
2601 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2602
2603 return chain;
2604 }
2605
virtio_net_rsc_receive(NetClientState * nc,const uint8_t * buf,size_t size)2606 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2607 const uint8_t *buf,
2608 size_t size)
2609 {
2610 uint16_t proto;
2611 VirtioNetRscChain *chain;
2612 struct eth_header *eth;
2613 VirtIONet *n;
2614
2615 n = qemu_get_nic_opaque(nc);
2616 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2617 return virtio_net_do_receive(nc, buf, size);
2618 }
2619
2620 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2621 proto = htons(eth->h_proto);
2622
2623 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2624 if (chain) {
2625 chain->stat.received++;
2626 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2627 return virtio_net_rsc_receive4(chain, nc, buf, size);
2628 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2629 return virtio_net_rsc_receive6(chain, nc, buf, size);
2630 }
2631 }
2632 return virtio_net_do_receive(nc, buf, size);
2633 }
2634
virtio_net_receive(NetClientState * nc,const uint8_t * buf,size_t size)2635 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2636 size_t size)
2637 {
2638 VirtIONet *n = qemu_get_nic_opaque(nc);
2639 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2640 return virtio_net_rsc_receive(nc, buf, size);
2641 } else {
2642 return virtio_net_do_receive(nc, buf, size);
2643 }
2644 }
2645
2646 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2647
virtio_net_tx_complete(NetClientState * nc,ssize_t len)2648 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2649 {
2650 VirtIONet *n = qemu_get_nic_opaque(nc);
2651 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2652 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2653 int ret;
2654
2655 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2656 virtio_notify(vdev, q->tx_vq);
2657
2658 g_free(q->async_tx.elem);
2659 q->async_tx.elem = NULL;
2660
2661 virtio_queue_set_notification(q->tx_vq, 1);
2662 ret = virtio_net_flush_tx(q);
2663 if (ret >= n->tx_burst) {
2664 /*
2665 * the flush has been stopped by tx_burst
2666 * we will not receive notification for the
2667 * remainining part, so re-schedule
2668 */
2669 virtio_queue_set_notification(q->tx_vq, 0);
2670 if (q->tx_bh) {
2671 qemu_bh_schedule(q->tx_bh);
2672 } else {
2673 timer_mod(q->tx_timer,
2674 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2675 }
2676 q->tx_waiting = 1;
2677 }
2678 }
2679
2680 /* TX */
virtio_net_flush_tx(VirtIONetQueue * q)2681 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2682 {
2683 VirtIONet *n = q->n;
2684 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2685 VirtQueueElement *elem;
2686 int32_t num_packets = 0;
2687 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2688 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2689 return num_packets;
2690 }
2691
2692 if (q->async_tx.elem) {
2693 virtio_queue_set_notification(q->tx_vq, 0);
2694 return num_packets;
2695 }
2696
2697 for (;;) {
2698 ssize_t ret;
2699 unsigned int out_num;
2700 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2701 struct virtio_net_hdr_v1_hash vhdr;
2702
2703 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2704 if (!elem) {
2705 break;
2706 }
2707
2708 out_num = elem->out_num;
2709 out_sg = elem->out_sg;
2710 if (out_num < 1) {
2711 virtio_error(vdev, "virtio-net header not in first element");
2712 virtqueue_detach_element(q->tx_vq, elem, 0);
2713 g_free(elem);
2714 return -EINVAL;
2715 }
2716
2717 if (n->has_vnet_hdr) {
2718 if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) <
2719 n->guest_hdr_len) {
2720 virtio_error(vdev, "virtio-net header incorrect");
2721 virtqueue_detach_element(q->tx_vq, elem, 0);
2722 g_free(elem);
2723 return -EINVAL;
2724 }
2725 if (n->needs_vnet_hdr_swap) {
2726 virtio_net_hdr_swap(vdev, (void *) &vhdr);
2727 sg2[0].iov_base = &vhdr;
2728 sg2[0].iov_len = n->guest_hdr_len;
2729 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2730 out_sg, out_num,
2731 n->guest_hdr_len, -1);
2732 if (out_num == VIRTQUEUE_MAX_SIZE) {
2733 goto drop;
2734 }
2735 out_num += 1;
2736 out_sg = sg2;
2737 }
2738 }
2739 /*
2740 * If host wants to see the guest header as is, we can
2741 * pass it on unchanged. Otherwise, copy just the parts
2742 * that host is interested in.
2743 */
2744 assert(n->host_hdr_len <= n->guest_hdr_len);
2745 if (n->host_hdr_len != n->guest_hdr_len) {
2746 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2747 out_sg, out_num,
2748 0, n->host_hdr_len);
2749 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2750 out_sg, out_num,
2751 n->guest_hdr_len, -1);
2752 out_num = sg_num;
2753 out_sg = sg;
2754 }
2755
2756 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2757 out_sg, out_num, virtio_net_tx_complete);
2758 if (ret == 0) {
2759 virtio_queue_set_notification(q->tx_vq, 0);
2760 q->async_tx.elem = elem;
2761 return -EBUSY;
2762 }
2763
2764 drop:
2765 virtqueue_push(q->tx_vq, elem, 0);
2766 virtio_notify(vdev, q->tx_vq);
2767 g_free(elem);
2768
2769 if (++num_packets >= n->tx_burst) {
2770 break;
2771 }
2772 }
2773 return num_packets;
2774 }
2775
2776 static void virtio_net_tx_timer(void *opaque);
2777
virtio_net_handle_tx_timer(VirtIODevice * vdev,VirtQueue * vq)2778 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2779 {
2780 VirtIONet *n = VIRTIO_NET(vdev);
2781 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2782
2783 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2784 virtio_net_drop_tx_queue_data(vdev, vq);
2785 return;
2786 }
2787
2788 /* This happens when device was stopped but VCPU wasn't. */
2789 if (!vdev->vm_running) {
2790 q->tx_waiting = 1;
2791 return;
2792 }
2793
2794 if (q->tx_waiting) {
2795 /* We already have queued packets, immediately flush */
2796 timer_del(q->tx_timer);
2797 virtio_net_tx_timer(q);
2798 } else {
2799 /* re-arm timer to flush it (and more) on next tick */
2800 timer_mod(q->tx_timer,
2801 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2802 q->tx_waiting = 1;
2803 virtio_queue_set_notification(vq, 0);
2804 }
2805 }
2806
virtio_net_handle_tx_bh(VirtIODevice * vdev,VirtQueue * vq)2807 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2808 {
2809 VirtIONet *n = VIRTIO_NET(vdev);
2810 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2811
2812 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2813 virtio_net_drop_tx_queue_data(vdev, vq);
2814 return;
2815 }
2816
2817 if (unlikely(q->tx_waiting)) {
2818 return;
2819 }
2820 q->tx_waiting = 1;
2821 /* This happens when device was stopped but VCPU wasn't. */
2822 if (!vdev->vm_running) {
2823 return;
2824 }
2825 virtio_queue_set_notification(vq, 0);
2826 qemu_bh_schedule(q->tx_bh);
2827 }
2828
virtio_net_tx_timer(void * opaque)2829 static void virtio_net_tx_timer(void *opaque)
2830 {
2831 VirtIONetQueue *q = opaque;
2832 VirtIONet *n = q->n;
2833 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2834 int ret;
2835
2836 /* This happens when device was stopped but BH wasn't. */
2837 if (!vdev->vm_running) {
2838 /* Make sure tx waiting is set, so we'll run when restarted. */
2839 assert(q->tx_waiting);
2840 return;
2841 }
2842
2843 q->tx_waiting = 0;
2844
2845 /* Just in case the driver is not ready on more */
2846 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2847 return;
2848 }
2849
2850 ret = virtio_net_flush_tx(q);
2851 if (ret == -EBUSY || ret == -EINVAL) {
2852 return;
2853 }
2854 /*
2855 * If we flush a full burst of packets, assume there are
2856 * more coming and immediately rearm
2857 */
2858 if (ret >= n->tx_burst) {
2859 q->tx_waiting = 1;
2860 timer_mod(q->tx_timer,
2861 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2862 return;
2863 }
2864 /*
2865 * If less than a full burst, re-enable notification and flush
2866 * anything that may have come in while we weren't looking. If
2867 * we find something, assume the guest is still active and rearm
2868 */
2869 virtio_queue_set_notification(q->tx_vq, 1);
2870 ret = virtio_net_flush_tx(q);
2871 if (ret > 0) {
2872 virtio_queue_set_notification(q->tx_vq, 0);
2873 q->tx_waiting = 1;
2874 timer_mod(q->tx_timer,
2875 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2876 }
2877 }
2878
virtio_net_tx_bh(void * opaque)2879 static void virtio_net_tx_bh(void *opaque)
2880 {
2881 VirtIONetQueue *q = opaque;
2882 VirtIONet *n = q->n;
2883 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2884 int32_t ret;
2885
2886 /* This happens when device was stopped but BH wasn't. */
2887 if (!vdev->vm_running) {
2888 /* Make sure tx waiting is set, so we'll run when restarted. */
2889 assert(q->tx_waiting);
2890 return;
2891 }
2892
2893 q->tx_waiting = 0;
2894
2895 /* Just in case the driver is not ready on more */
2896 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2897 return;
2898 }
2899
2900 ret = virtio_net_flush_tx(q);
2901 if (ret == -EBUSY || ret == -EINVAL) {
2902 return; /* Notification re-enable handled by tx_complete or device
2903 * broken */
2904 }
2905
2906 /* If we flush a full burst of packets, assume there are
2907 * more coming and immediately reschedule */
2908 if (ret >= n->tx_burst) {
2909 qemu_bh_schedule(q->tx_bh);
2910 q->tx_waiting = 1;
2911 return;
2912 }
2913
2914 /* If less than a full burst, re-enable notification and flush
2915 * anything that may have come in while we weren't looking. If
2916 * we find something, assume the guest is still active and reschedule */
2917 virtio_queue_set_notification(q->tx_vq, 1);
2918 ret = virtio_net_flush_tx(q);
2919 if (ret == -EINVAL) {
2920 return;
2921 } else if (ret > 0) {
2922 virtio_queue_set_notification(q->tx_vq, 0);
2923 qemu_bh_schedule(q->tx_bh);
2924 q->tx_waiting = 1;
2925 }
2926 }
2927
virtio_net_add_queue(VirtIONet * n,int index)2928 static void virtio_net_add_queue(VirtIONet *n, int index)
2929 {
2930 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2931
2932 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2933 virtio_net_handle_rx);
2934
2935 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2936 n->vqs[index].tx_vq =
2937 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2938 virtio_net_handle_tx_timer);
2939 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2940 virtio_net_tx_timer,
2941 &n->vqs[index]);
2942 } else {
2943 n->vqs[index].tx_vq =
2944 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2945 virtio_net_handle_tx_bh);
2946 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2947 &DEVICE(vdev)->mem_reentrancy_guard);
2948 }
2949
2950 n->vqs[index].tx_waiting = 0;
2951 n->vqs[index].n = n;
2952 }
2953
virtio_net_del_queue(VirtIONet * n,int index)2954 static void virtio_net_del_queue(VirtIONet *n, int index)
2955 {
2956 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2957 VirtIONetQueue *q = &n->vqs[index];
2958 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2959
2960 qemu_purge_queued_packets(nc);
2961
2962 virtio_del_queue(vdev, index * 2);
2963 if (q->tx_timer) {
2964 timer_free(q->tx_timer);
2965 q->tx_timer = NULL;
2966 } else {
2967 qemu_bh_delete(q->tx_bh);
2968 q->tx_bh = NULL;
2969 }
2970 q->tx_waiting = 0;
2971 virtio_del_queue(vdev, index * 2 + 1);
2972 }
2973
virtio_net_change_num_queue_pairs(VirtIONet * n,int new_max_queue_pairs)2974 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2975 {
2976 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2977 int old_num_queues = virtio_get_num_queues(vdev);
2978 int new_num_queues = new_max_queue_pairs * 2 + 1;
2979 int i;
2980
2981 assert(old_num_queues >= 3);
2982 assert(old_num_queues % 2 == 1);
2983
2984 if (old_num_queues == new_num_queues) {
2985 return;
2986 }
2987
2988 /*
2989 * We always need to remove and add ctrl vq if
2990 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2991 * and then we only enter one of the following two loops.
2992 */
2993 virtio_del_queue(vdev, old_num_queues - 1);
2994
2995 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2996 /* new_num_queues < old_num_queues */
2997 virtio_net_del_queue(n, i / 2);
2998 }
2999
3000 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3001 /* new_num_queues > old_num_queues */
3002 virtio_net_add_queue(n, i / 2);
3003 }
3004
3005 /* add ctrl_vq last */
3006 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3007 }
3008
virtio_net_set_multiqueue(VirtIONet * n,int multiqueue)3009 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3010 {
3011 int max = multiqueue ? n->max_queue_pairs : 1;
3012
3013 n->multiqueue = multiqueue;
3014 virtio_net_change_num_queue_pairs(n, max);
3015
3016 virtio_net_set_queue_pairs(n);
3017 }
3018
virtio_net_post_load_device(void * opaque,int version_id)3019 static int virtio_net_post_load_device(void *opaque, int version_id)
3020 {
3021 VirtIONet *n = opaque;
3022 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3023 int i, link_down;
3024
3025 trace_virtio_net_post_load_device();
3026 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3027 virtio_vdev_has_feature(vdev,
3028 VIRTIO_F_VERSION_1),
3029 virtio_vdev_has_feature(vdev,
3030 VIRTIO_NET_F_HASH_REPORT));
3031
3032 /* MAC_TABLE_ENTRIES may be different from the saved image */
3033 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3034 n->mac_table.in_use = 0;
3035 }
3036
3037 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3038 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3039 }
3040
3041 /*
3042 * curr_guest_offloads will be later overwritten by the
3043 * virtio_set_features_nocheck call done from the virtio_load.
3044 * Here we make sure it is preserved and restored accordingly
3045 * in the virtio_net_post_load_virtio callback.
3046 */
3047 n->saved_guest_offloads = n->curr_guest_offloads;
3048
3049 virtio_net_set_queue_pairs(n);
3050
3051 /* Find the first multicast entry in the saved MAC filter */
3052 for (i = 0; i < n->mac_table.in_use; i++) {
3053 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3054 break;
3055 }
3056 }
3057 n->mac_table.first_multi = i;
3058
3059 /* nc.link_down can't be migrated, so infer link_down according
3060 * to link status bit in n->status */
3061 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3062 for (i = 0; i < n->max_queue_pairs; i++) {
3063 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3064 }
3065
3066 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3067 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3068 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3069 QEMU_CLOCK_VIRTUAL,
3070 virtio_net_announce_timer, n);
3071 if (n->announce_timer.round) {
3072 timer_mod(n->announce_timer.tm,
3073 qemu_clock_get_ms(n->announce_timer.type));
3074 } else {
3075 qemu_announce_timer_del(&n->announce_timer, false);
3076 }
3077 }
3078
3079 if (n->rss_data.enabled) {
3080 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3081 if (!n->rss_data.populate_hash) {
3082 if (!virtio_net_attach_epbf_rss(n)) {
3083 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3084 warn_report("Can't post-load eBPF RSS for vhost");
3085 } else {
3086 warn_report("Can't post-load eBPF RSS - "
3087 "fallback to software RSS");
3088 n->rss_data.enabled_software_rss = true;
3089 }
3090 }
3091 }
3092
3093 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3094 n->rss_data.indirections_len,
3095 sizeof(n->rss_data.key));
3096 } else {
3097 trace_virtio_net_rss_disable();
3098 }
3099 return 0;
3100 }
3101
virtio_net_post_load_virtio(VirtIODevice * vdev)3102 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3103 {
3104 VirtIONet *n = VIRTIO_NET(vdev);
3105 /*
3106 * The actual needed state is now in saved_guest_offloads,
3107 * see virtio_net_post_load_device for detail.
3108 * Restore it back and apply the desired offloads.
3109 */
3110 n->curr_guest_offloads = n->saved_guest_offloads;
3111 if (peer_has_vnet_hdr(n)) {
3112 virtio_net_apply_guest_offloads(n);
3113 }
3114
3115 return 0;
3116 }
3117
3118 /* tx_waiting field of a VirtIONetQueue */
3119 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3120 .name = "virtio-net-queue-tx_waiting",
3121 .fields = (VMStateField[]) {
3122 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3123 VMSTATE_END_OF_LIST()
3124 },
3125 };
3126
max_queue_pairs_gt_1(void * opaque,int version_id)3127 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3128 {
3129 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3130 }
3131
has_ctrl_guest_offloads(void * opaque,int version_id)3132 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3133 {
3134 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3135 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3136 }
3137
mac_table_fits(void * opaque,int version_id)3138 static bool mac_table_fits(void *opaque, int version_id)
3139 {
3140 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3141 }
3142
mac_table_doesnt_fit(void * opaque,int version_id)3143 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3144 {
3145 return !mac_table_fits(opaque, version_id);
3146 }
3147
3148 /* This temporary type is shared by all the WITH_TMP methods
3149 * although only some fields are used by each.
3150 */
3151 struct VirtIONetMigTmp {
3152 VirtIONet *parent;
3153 VirtIONetQueue *vqs_1;
3154 uint16_t curr_queue_pairs_1;
3155 uint8_t has_ufo;
3156 uint32_t has_vnet_hdr;
3157 };
3158
3159 /* The 2nd and subsequent tx_waiting flags are loaded later than
3160 * the 1st entry in the queue_pairs and only if there's more than one
3161 * entry. We use the tmp mechanism to calculate a temporary
3162 * pointer and count and also validate the count.
3163 */
3164
virtio_net_tx_waiting_pre_save(void * opaque)3165 static int virtio_net_tx_waiting_pre_save(void *opaque)
3166 {
3167 struct VirtIONetMigTmp *tmp = opaque;
3168
3169 tmp->vqs_1 = tmp->parent->vqs + 1;
3170 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3171 if (tmp->parent->curr_queue_pairs == 0) {
3172 tmp->curr_queue_pairs_1 = 0;
3173 }
3174
3175 return 0;
3176 }
3177
virtio_net_tx_waiting_pre_load(void * opaque)3178 static int virtio_net_tx_waiting_pre_load(void *opaque)
3179 {
3180 struct VirtIONetMigTmp *tmp = opaque;
3181
3182 /* Reuse the pointer setup from save */
3183 virtio_net_tx_waiting_pre_save(opaque);
3184
3185 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3186 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3187 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3188
3189 return -EINVAL;
3190 }
3191
3192 return 0; /* all good */
3193 }
3194
3195 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3196 .name = "virtio-net-tx_waiting",
3197 .pre_load = virtio_net_tx_waiting_pre_load,
3198 .pre_save = virtio_net_tx_waiting_pre_save,
3199 .fields = (VMStateField[]) {
3200 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3201 curr_queue_pairs_1,
3202 vmstate_virtio_net_queue_tx_waiting,
3203 struct VirtIONetQueue),
3204 VMSTATE_END_OF_LIST()
3205 },
3206 };
3207
3208 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3209 * flag set we need to check that we have it
3210 */
virtio_net_ufo_post_load(void * opaque,int version_id)3211 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3212 {
3213 struct VirtIONetMigTmp *tmp = opaque;
3214
3215 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3216 error_report("virtio-net: saved image requires TUN_F_UFO support");
3217 return -EINVAL;
3218 }
3219
3220 return 0;
3221 }
3222
virtio_net_ufo_pre_save(void * opaque)3223 static int virtio_net_ufo_pre_save(void *opaque)
3224 {
3225 struct VirtIONetMigTmp *tmp = opaque;
3226
3227 tmp->has_ufo = tmp->parent->has_ufo;
3228
3229 return 0;
3230 }
3231
3232 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3233 .name = "virtio-net-ufo",
3234 .post_load = virtio_net_ufo_post_load,
3235 .pre_save = virtio_net_ufo_pre_save,
3236 .fields = (VMStateField[]) {
3237 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3238 VMSTATE_END_OF_LIST()
3239 },
3240 };
3241
3242 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3243 * flag set we need to check that we have it
3244 */
virtio_net_vnet_post_load(void * opaque,int version_id)3245 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3246 {
3247 struct VirtIONetMigTmp *tmp = opaque;
3248
3249 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3250 error_report("virtio-net: saved image requires vnet_hdr=on");
3251 return -EINVAL;
3252 }
3253
3254 return 0;
3255 }
3256
virtio_net_vnet_pre_save(void * opaque)3257 static int virtio_net_vnet_pre_save(void *opaque)
3258 {
3259 struct VirtIONetMigTmp *tmp = opaque;
3260
3261 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3262
3263 return 0;
3264 }
3265
3266 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3267 .name = "virtio-net-vnet",
3268 .post_load = virtio_net_vnet_post_load,
3269 .pre_save = virtio_net_vnet_pre_save,
3270 .fields = (VMStateField[]) {
3271 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3272 VMSTATE_END_OF_LIST()
3273 },
3274 };
3275
virtio_net_rss_needed(void * opaque)3276 static bool virtio_net_rss_needed(void *opaque)
3277 {
3278 return VIRTIO_NET(opaque)->rss_data.enabled;
3279 }
3280
3281 static const VMStateDescription vmstate_virtio_net_rss = {
3282 .name = "virtio-net-device/rss",
3283 .version_id = 1,
3284 .minimum_version_id = 1,
3285 .needed = virtio_net_rss_needed,
3286 .fields = (VMStateField[]) {
3287 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3288 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3289 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3290 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3291 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3292 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3293 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3294 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3295 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3296 rss_data.indirections_len, 0,
3297 vmstate_info_uint16, uint16_t),
3298 VMSTATE_END_OF_LIST()
3299 },
3300 };
3301
3302 static const VMStateDescription vmstate_virtio_net_device = {
3303 .name = "virtio-net-device",
3304 .version_id = VIRTIO_NET_VM_VERSION,
3305 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3306 .post_load = virtio_net_post_load_device,
3307 .fields = (VMStateField[]) {
3308 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3309 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3310 vmstate_virtio_net_queue_tx_waiting,
3311 VirtIONetQueue),
3312 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3313 VMSTATE_UINT16(status, VirtIONet),
3314 VMSTATE_UINT8(promisc, VirtIONet),
3315 VMSTATE_UINT8(allmulti, VirtIONet),
3316 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3317
3318 /* Guarded pair: If it fits we load it, else we throw it away
3319 * - can happen if source has a larger MAC table.; post-load
3320 * sets flags in this case.
3321 */
3322 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3323 0, mac_table_fits, mac_table.in_use,
3324 ETH_ALEN),
3325 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3326 mac_table.in_use, ETH_ALEN),
3327
3328 /* Note: This is an array of uint32's that's always been saved as a
3329 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3330 * but based on the uint.
3331 */
3332 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3333 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3334 vmstate_virtio_net_has_vnet),
3335 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3336 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3337 VMSTATE_UINT8(alluni, VirtIONet),
3338 VMSTATE_UINT8(nomulti, VirtIONet),
3339 VMSTATE_UINT8(nouni, VirtIONet),
3340 VMSTATE_UINT8(nobcast, VirtIONet),
3341 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3342 vmstate_virtio_net_has_ufo),
3343 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3344 vmstate_info_uint16_equal, uint16_t),
3345 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3346 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3347 vmstate_virtio_net_tx_waiting),
3348 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3349 has_ctrl_guest_offloads),
3350 VMSTATE_END_OF_LIST()
3351 },
3352 .subsections = (const VMStateDescription * []) {
3353 &vmstate_virtio_net_rss,
3354 NULL
3355 }
3356 };
3357
3358 static NetClientInfo net_virtio_info = {
3359 .type = NET_CLIENT_DRIVER_NIC,
3360 .size = sizeof(NICState),
3361 .can_receive = virtio_net_can_receive,
3362 .receive = virtio_net_receive,
3363 .link_status_changed = virtio_net_set_link_status,
3364 .query_rx_filter = virtio_net_query_rxfilter,
3365 .announce = virtio_net_announce,
3366 };
3367
virtio_net_guest_notifier_pending(VirtIODevice * vdev,int idx)3368 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3369 {
3370 VirtIONet *n = VIRTIO_NET(vdev);
3371 NetClientState *nc;
3372 assert(n->vhost_started);
3373 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3374 /* Must guard against invalid features and bogus queue index
3375 * from being set by malicious guest, or penetrated through
3376 * buggy migration stream.
3377 */
3378 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3379 qemu_log_mask(LOG_GUEST_ERROR,
3380 "%s: bogus vq index ignored\n", __func__);
3381 return false;
3382 }
3383 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3384 } else {
3385 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3386 }
3387 /*
3388 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3389 * as the macro of configure interrupt's IDX, If this driver does not
3390 * support, the function will return false
3391 */
3392
3393 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3394 return vhost_net_config_pending(get_vhost_net(nc->peer));
3395 }
3396 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3397 }
3398
virtio_net_guest_notifier_mask(VirtIODevice * vdev,int idx,bool mask)3399 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3400 bool mask)
3401 {
3402 VirtIONet *n = VIRTIO_NET(vdev);
3403 NetClientState *nc;
3404 assert(n->vhost_started);
3405 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3406 /* Must guard against invalid features and bogus queue index
3407 * from being set by malicious guest, or penetrated through
3408 * buggy migration stream.
3409 */
3410 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3411 qemu_log_mask(LOG_GUEST_ERROR,
3412 "%s: bogus vq index ignored\n", __func__);
3413 return;
3414 }
3415 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3416 } else {
3417 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3418 }
3419 /*
3420 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3421 * as the macro of configure interrupt's IDX, If this driver does not
3422 * support, the function will return
3423 */
3424
3425 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3426 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3427 return;
3428 }
3429 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3430 }
3431
virtio_net_set_config_size(VirtIONet * n,uint64_t host_features)3432 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3433 {
3434 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3435
3436 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3437 }
3438
virtio_net_set_netclient_name(VirtIONet * n,const char * name,const char * type)3439 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3440 const char *type)
3441 {
3442 /*
3443 * The name can be NULL, the netclient name will be type.x.
3444 */
3445 assert(type != NULL);
3446
3447 g_free(n->netclient_name);
3448 g_free(n->netclient_type);
3449 n->netclient_name = g_strdup(name);
3450 n->netclient_type = g_strdup(type);
3451 }
3452
failover_unplug_primary(VirtIONet * n,DeviceState * dev)3453 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3454 {
3455 HotplugHandler *hotplug_ctrl;
3456 PCIDevice *pci_dev;
3457 Error *err = NULL;
3458
3459 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3460 if (hotplug_ctrl) {
3461 pci_dev = PCI_DEVICE(dev);
3462 pci_dev->partially_hotplugged = true;
3463 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3464 if (err) {
3465 error_report_err(err);
3466 return false;
3467 }
3468 } else {
3469 return false;
3470 }
3471 return true;
3472 }
3473
failover_replug_primary(VirtIONet * n,DeviceState * dev,Error ** errp)3474 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3475 Error **errp)
3476 {
3477 Error *err = NULL;
3478 HotplugHandler *hotplug_ctrl;
3479 PCIDevice *pdev = PCI_DEVICE(dev);
3480 BusState *primary_bus;
3481
3482 if (!pdev->partially_hotplugged) {
3483 return true;
3484 }
3485 primary_bus = dev->parent_bus;
3486 if (!primary_bus) {
3487 error_setg(errp, "virtio_net: couldn't find primary bus");
3488 return false;
3489 }
3490 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3491 qatomic_set(&n->failover_primary_hidden, false);
3492 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3493 if (hotplug_ctrl) {
3494 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3495 if (err) {
3496 goto out;
3497 }
3498 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3499 }
3500 pdev->partially_hotplugged = false;
3501
3502 out:
3503 error_propagate(errp, err);
3504 return !err;
3505 }
3506
virtio_net_handle_migration_primary(VirtIONet * n,MigrationState * s)3507 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3508 {
3509 bool should_be_hidden;
3510 Error *err = NULL;
3511 DeviceState *dev = failover_find_primary_device(n);
3512
3513 if (!dev) {
3514 return;
3515 }
3516
3517 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3518
3519 if (migration_in_setup(s) && !should_be_hidden) {
3520 if (failover_unplug_primary(n, dev)) {
3521 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3522 qapi_event_send_unplug_primary(dev->id);
3523 qatomic_set(&n->failover_primary_hidden, true);
3524 } else {
3525 warn_report("couldn't unplug primary device");
3526 }
3527 } else if (migration_has_failed(s)) {
3528 /* We already unplugged the device let's plug it back */
3529 if (!failover_replug_primary(n, dev, &err)) {
3530 if (err) {
3531 error_report_err(err);
3532 }
3533 }
3534 }
3535 }
3536
virtio_net_migration_state_notifier(Notifier * notifier,void * data)3537 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3538 {
3539 MigrationState *s = data;
3540 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3541 virtio_net_handle_migration_primary(n, s);
3542 }
3543
failover_hide_primary_device(DeviceListener * listener,const QDict * device_opts,bool from_json,Error ** errp)3544 static bool failover_hide_primary_device(DeviceListener *listener,
3545 const QDict *device_opts,
3546 bool from_json,
3547 Error **errp)
3548 {
3549 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3550 const char *standby_id;
3551
3552 if (!device_opts) {
3553 return false;
3554 }
3555
3556 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3557 return false;
3558 }
3559
3560 if (!qdict_haskey(device_opts, "id")) {
3561 error_setg(errp, "Device with failover_pair_id needs to have id");
3562 return false;
3563 }
3564
3565 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3566 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3567 return false;
3568 }
3569
3570 /*
3571 * The hide helper can be called several times for a given device.
3572 * Check there is only one primary for a virtio-net device but
3573 * don't duplicate the qdict several times if it's called for the same
3574 * device.
3575 */
3576 if (n->primary_opts) {
3577 const char *old, *new;
3578 /* devices with failover_pair_id always have an id */
3579 old = qdict_get_str(n->primary_opts, "id");
3580 new = qdict_get_str(device_opts, "id");
3581 if (strcmp(old, new) != 0) {
3582 error_setg(errp, "Cannot attach more than one primary device to "
3583 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3584 return false;
3585 }
3586 } else {
3587 n->primary_opts = qdict_clone_shallow(device_opts);
3588 n->primary_opts_from_json = from_json;
3589 }
3590
3591 /* failover_primary_hidden is set during feature negotiation */
3592 return qatomic_read(&n->failover_primary_hidden);
3593 }
3594
virtio_net_device_realize(DeviceState * dev,Error ** errp)3595 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3596 {
3597 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3598 VirtIONet *n = VIRTIO_NET(dev);
3599 NetClientState *nc;
3600 int i;
3601
3602 if (n->net_conf.mtu) {
3603 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3604 }
3605
3606 if (n->net_conf.duplex_str) {
3607 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3608 n->net_conf.duplex = DUPLEX_HALF;
3609 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3610 n->net_conf.duplex = DUPLEX_FULL;
3611 } else {
3612 error_setg(errp, "'duplex' must be 'half' or 'full'");
3613 return;
3614 }
3615 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3616 } else {
3617 n->net_conf.duplex = DUPLEX_UNKNOWN;
3618 }
3619
3620 if (n->net_conf.speed < SPEED_UNKNOWN) {
3621 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3622 return;
3623 }
3624 if (n->net_conf.speed >= 0) {
3625 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3626 }
3627
3628 if (n->failover) {
3629 n->primary_listener.hide_device = failover_hide_primary_device;
3630 qatomic_set(&n->failover_primary_hidden, true);
3631 device_listener_register(&n->primary_listener);
3632 migration_add_notifier(&n->migration_state,
3633 virtio_net_migration_state_notifier);
3634 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3635 }
3636
3637 virtio_net_set_config_size(n, n->host_features);
3638 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3639
3640 /*
3641 * We set a lower limit on RX queue size to what it always was.
3642 * Guests that want a smaller ring can always resize it without
3643 * help from us (using virtio 1 and up).
3644 */
3645 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3646 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3647 !is_power_of_2(n->net_conf.rx_queue_size)) {
3648 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3649 "must be a power of 2 between %d and %d.",
3650 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3651 VIRTQUEUE_MAX_SIZE);
3652 virtio_cleanup(vdev);
3653 return;
3654 }
3655
3656 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3657 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3658 !is_power_of_2(n->net_conf.tx_queue_size)) {
3659 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3660 "must be a power of 2 between %d and %d",
3661 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3662 virtio_net_max_tx_queue_size(n));
3663 virtio_cleanup(vdev);
3664 return;
3665 }
3666
3667 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3668
3669 /*
3670 * Figure out the datapath queue pairs since the backend could
3671 * provide control queue via peers as well.
3672 */
3673 if (n->nic_conf.peers.queues) {
3674 for (i = 0; i < n->max_ncs; i++) {
3675 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3676 ++n->max_queue_pairs;
3677 }
3678 }
3679 }
3680 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3681
3682 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3683 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3684 "must be a positive integer less than %d.",
3685 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3686 virtio_cleanup(vdev);
3687 return;
3688 }
3689 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3690 n->curr_queue_pairs = 1;
3691 n->tx_timeout = n->net_conf.txtimer;
3692
3693 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3694 && strcmp(n->net_conf.tx, "bh")) {
3695 warn_report("virtio-net: "
3696 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3697 n->net_conf.tx);
3698 error_printf("Defaulting to \"bh\"");
3699 }
3700
3701 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3702 n->net_conf.tx_queue_size);
3703
3704 for (i = 0; i < n->max_queue_pairs; i++) {
3705 virtio_net_add_queue(n, i);
3706 }
3707
3708 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3709 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3710 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3711 n->status = VIRTIO_NET_S_LINK_UP;
3712 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3713 QEMU_CLOCK_VIRTUAL,
3714 virtio_net_announce_timer, n);
3715 n->announce_timer.round = 0;
3716
3717 if (n->netclient_type) {
3718 /*
3719 * Happen when virtio_net_set_netclient_name has been called.
3720 */
3721 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3722 n->netclient_type, n->netclient_name,
3723 &dev->mem_reentrancy_guard, n);
3724 } else {
3725 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3726 object_get_typename(OBJECT(dev)), dev->id,
3727 &dev->mem_reentrancy_guard, n);
3728 }
3729
3730 for (i = 0; i < n->max_queue_pairs; i++) {
3731 n->nic->ncs[i].do_not_pad = true;
3732 }
3733
3734 peer_test_vnet_hdr(n);
3735 if (peer_has_vnet_hdr(n)) {
3736 for (i = 0; i < n->max_queue_pairs; i++) {
3737 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3738 }
3739 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3740 } else {
3741 n->host_hdr_len = 0;
3742 }
3743
3744 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3745
3746 n->vqs[0].tx_waiting = 0;
3747 n->tx_burst = n->net_conf.txburst;
3748 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3749 n->promisc = 1; /* for compatibility */
3750
3751 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3752
3753 n->vlans = g_malloc0(MAX_VLAN >> 3);
3754
3755 nc = qemu_get_queue(n->nic);
3756 nc->rxfilter_notify_enabled = 1;
3757
3758 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3759 struct virtio_net_config netcfg = {};
3760 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3761 vhost_net_set_config(get_vhost_net(nc->peer),
3762 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3763 }
3764 QTAILQ_INIT(&n->rsc_chains);
3765 n->qdev = dev;
3766
3767 net_rx_pkt_init(&n->rx_pkt);
3768
3769 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3770 virtio_net_load_ebpf(n);
3771 }
3772 }
3773
virtio_net_device_unrealize(DeviceState * dev)3774 static void virtio_net_device_unrealize(DeviceState *dev)
3775 {
3776 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3777 VirtIONet *n = VIRTIO_NET(dev);
3778 int i, max_queue_pairs;
3779
3780 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3781 virtio_net_unload_ebpf(n);
3782 }
3783
3784 /* This will stop vhost backend if appropriate. */
3785 virtio_net_set_status(vdev, 0);
3786
3787 g_free(n->netclient_name);
3788 n->netclient_name = NULL;
3789 g_free(n->netclient_type);
3790 n->netclient_type = NULL;
3791
3792 g_free(n->mac_table.macs);
3793 g_free(n->vlans);
3794
3795 if (n->failover) {
3796 qobject_unref(n->primary_opts);
3797 device_listener_unregister(&n->primary_listener);
3798 migration_remove_notifier(&n->migration_state);
3799 } else {
3800 assert(n->primary_opts == NULL);
3801 }
3802
3803 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3804 for (i = 0; i < max_queue_pairs; i++) {
3805 virtio_net_del_queue(n, i);
3806 }
3807 /* delete also control vq */
3808 virtio_del_queue(vdev, max_queue_pairs * 2);
3809 qemu_announce_timer_del(&n->announce_timer, false);
3810 g_free(n->vqs);
3811 qemu_del_nic(n->nic);
3812 virtio_net_rsc_cleanup(n);
3813 g_free(n->rss_data.indirections_table);
3814 net_rx_pkt_uninit(n->rx_pkt);
3815 virtio_cleanup(vdev);
3816 }
3817
virtio_net_instance_init(Object * obj)3818 static void virtio_net_instance_init(Object *obj)
3819 {
3820 VirtIONet *n = VIRTIO_NET(obj);
3821
3822 /*
3823 * The default config_size is sizeof(struct virtio_net_config).
3824 * Can be overridden with virtio_net_set_config_size.
3825 */
3826 n->config_size = sizeof(struct virtio_net_config);
3827 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3828 "bootindex", "/ethernet-phy@0",
3829 DEVICE(n));
3830
3831 ebpf_rss_init(&n->ebpf_rss);
3832 }
3833
virtio_net_pre_save(void * opaque)3834 static int virtio_net_pre_save(void *opaque)
3835 {
3836 VirtIONet *n = opaque;
3837
3838 /* At this point, backend must be stopped, otherwise
3839 * it might keep writing to memory. */
3840 assert(!n->vhost_started);
3841
3842 return 0;
3843 }
3844
primary_unplug_pending(void * opaque)3845 static bool primary_unplug_pending(void *opaque)
3846 {
3847 DeviceState *dev = opaque;
3848 DeviceState *primary;
3849 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3850 VirtIONet *n = VIRTIO_NET(vdev);
3851
3852 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3853 return false;
3854 }
3855 primary = failover_find_primary_device(n);
3856 return primary ? primary->pending_deleted_event : false;
3857 }
3858
dev_unplug_pending(void * opaque)3859 static bool dev_unplug_pending(void *opaque)
3860 {
3861 DeviceState *dev = opaque;
3862 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3863
3864 return vdc->primary_unplug_pending(dev);
3865 }
3866
virtio_net_get_vhost(VirtIODevice * vdev)3867 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3868 {
3869 VirtIONet *n = VIRTIO_NET(vdev);
3870 NetClientState *nc = qemu_get_queue(n->nic);
3871 struct vhost_net *net = get_vhost_net(nc->peer);
3872 return &net->dev;
3873 }
3874
3875 static const VMStateDescription vmstate_virtio_net = {
3876 .name = "virtio-net",
3877 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3878 .version_id = VIRTIO_NET_VM_VERSION,
3879 .fields = (VMStateField[]) {
3880 VMSTATE_VIRTIO_DEVICE,
3881 VMSTATE_END_OF_LIST()
3882 },
3883 .pre_save = virtio_net_pre_save,
3884 .dev_unplug_pending = dev_unplug_pending,
3885 };
3886
3887 static Property virtio_net_properties[] = {
3888 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3889 VIRTIO_NET_F_CSUM, true),
3890 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3891 VIRTIO_NET_F_GUEST_CSUM, true),
3892 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3893 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3894 VIRTIO_NET_F_GUEST_TSO4, true),
3895 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3896 VIRTIO_NET_F_GUEST_TSO6, true),
3897 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3898 VIRTIO_NET_F_GUEST_ECN, true),
3899 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3900 VIRTIO_NET_F_GUEST_UFO, true),
3901 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3902 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3903 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3904 VIRTIO_NET_F_HOST_TSO4, true),
3905 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3906 VIRTIO_NET_F_HOST_TSO6, true),
3907 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3908 VIRTIO_NET_F_HOST_ECN, true),
3909 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3910 VIRTIO_NET_F_HOST_UFO, true),
3911 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3912 VIRTIO_NET_F_MRG_RXBUF, true),
3913 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3914 VIRTIO_NET_F_STATUS, true),
3915 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3916 VIRTIO_NET_F_CTRL_VQ, true),
3917 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3918 VIRTIO_NET_F_CTRL_RX, true),
3919 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3920 VIRTIO_NET_F_CTRL_VLAN, true),
3921 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3922 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3923 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3924 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3925 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3926 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3927 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3928 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3929 VIRTIO_NET_F_RSS, false),
3930 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3931 VIRTIO_NET_F_HASH_REPORT, false),
3932 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3933 VIRTIO_NET_F_RSC_EXT, false),
3934 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3935 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3936 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3937 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3938 TX_TIMER_INTERVAL),
3939 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3940 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3941 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3942 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3943 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3944 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3945 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3946 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3947 true),
3948 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3949 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3950 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3951 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3952 VIRTIO_NET_F_GUEST_USO4, true),
3953 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3954 VIRTIO_NET_F_GUEST_USO6, true),
3955 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3956 VIRTIO_NET_F_HOST_USO, true),
3957 DEFINE_PROP_END_OF_LIST(),
3958 };
3959
virtio_net_class_init(ObjectClass * klass,void * data)3960 static void virtio_net_class_init(ObjectClass *klass, void *data)
3961 {
3962 DeviceClass *dc = DEVICE_CLASS(klass);
3963 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3964
3965 device_class_set_props(dc, virtio_net_properties);
3966 dc->vmsd = &vmstate_virtio_net;
3967 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3968 vdc->realize = virtio_net_device_realize;
3969 vdc->unrealize = virtio_net_device_unrealize;
3970 vdc->get_config = virtio_net_get_config;
3971 vdc->set_config = virtio_net_set_config;
3972 vdc->get_features = virtio_net_get_features;
3973 vdc->set_features = virtio_net_set_features;
3974 vdc->bad_features = virtio_net_bad_features;
3975 vdc->reset = virtio_net_reset;
3976 vdc->queue_reset = virtio_net_queue_reset;
3977 vdc->queue_enable = virtio_net_queue_enable;
3978 vdc->set_status = virtio_net_set_status;
3979 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3980 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3981 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3982 vdc->post_load = virtio_net_post_load_virtio;
3983 vdc->vmsd = &vmstate_virtio_net_device;
3984 vdc->primary_unplug_pending = primary_unplug_pending;
3985 vdc->get_vhost = virtio_net_get_vhost;
3986 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3987 }
3988
3989 static const TypeInfo virtio_net_info = {
3990 .name = TYPE_VIRTIO_NET,
3991 .parent = TYPE_VIRTIO_DEVICE,
3992 .instance_size = sizeof(VirtIONet),
3993 .instance_init = virtio_net_instance_init,
3994 .class_init = virtio_net_class_init,
3995 };
3996
virtio_register_types(void)3997 static void virtio_register_types(void)
3998 {
3999 type_register_static(&virtio_net_info);
4000 }
4001
4002 type_init(virtio_register_types)
4003