1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qobject/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "system/system.h"
43 #include "system/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "system/qtest.h"
51
52 #define VIRTIO_NET_VM_VERSION 11
53
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61
62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63
64 #define VIRTIO_NET_TCP_FLAG 0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73
74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76
77 /* Purge coalesced packets timer interval, This value affects the performance
78 a lot, and should be tuned carefully, '300000'(300us) is the recommended
79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92
93 static const VirtIOFeature feature_sizes[] = {
94 {.flags = 1ULL << VIRTIO_NET_F_MAC,
95 .end = endof(struct virtio_net_config, mac)},
96 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97 .end = endof(struct virtio_net_config, status)},
98 {.flags = 1ULL << VIRTIO_NET_F_MQ,
99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100 {.flags = 1ULL << VIRTIO_NET_F_MTU,
101 .end = endof(struct virtio_net_config, mtu)},
102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103 .end = endof(struct virtio_net_config, duplex)},
104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105 .end = endof(struct virtio_net_config, supported_hash_types)},
106 {}
107 };
108
109 static const VirtIOConfigSizeParams cfg_size_params = {
110 .min_size = endof(struct virtio_net_config, mac),
111 .max_size = sizeof(struct virtio_net_config),
112 .feature_sizes = feature_sizes
113 };
114
virtio_net_get_subqueue(NetClientState * nc)115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117 VirtIONet *n = qemu_get_nic_opaque(nc);
118
119 return &n->vqs[nc->queue_index];
120 }
121
vq2q(int queue_index)122 static int vq2q(int queue_index)
123 {
124 return queue_index / 2;
125 }
126
flush_or_purge_queued_packets(NetClientState * nc)127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129 if (!nc->peer) {
130 return;
131 }
132
133 qemu_flush_or_purge_queued_packets(nc->peer, true);
134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136
137 /* TODO
138 * - we could suppress RX interrupt if we were so inclined.
139 */
140
virtio_net_get_config(VirtIODevice * vdev,uint8_t * config)141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143 VirtIONet *n = VIRTIO_NET(vdev);
144 struct virtio_net_config netcfg;
145 NetClientState *nc = qemu_get_queue(n->nic);
146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147
148 int ret = 0;
149 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150 virtio_stw_p(vdev, &netcfg.status, n->status);
151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153 memcpy(netcfg.mac, n->mac, ETH_ALEN);
154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155 netcfg.duplex = n->net_conf.duplex;
156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160 virtio_stl_p(vdev, &netcfg.supported_hash_types,
161 n->rss_data.supported_hash_types);
162 memcpy(config, &netcfg, n->config_size);
163
164 /*
165 * Is this VDPA? No peer means not VDPA: there's no way to
166 * disconnect/reconnect a VDPA peer.
167 */
168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170 n->config_size);
171 if (ret == -1) {
172 return;
173 }
174
175 /*
176 * Some NIC/kernel combinations present 0 as the mac address. As that
177 * is not a legal address, try to proceed with the address from the
178 * QEMU command line in the hope that the address has been configured
179 * correctly elsewhere - just not reported by the device.
180 */
181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182 info_report("Zero hardware mac address detected. Ignoring.");
183 memcpy(netcfg.mac, n->mac, ETH_ALEN);
184 }
185
186 netcfg.status |= virtio_tswap16(vdev,
187 n->status & VIRTIO_NET_S_ANNOUNCE);
188 memcpy(config, &netcfg, n->config_size);
189 }
190 }
191
virtio_net_set_config(VirtIODevice * vdev,const uint8_t * config)192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194 VirtIONet *n = VIRTIO_NET(vdev);
195 struct virtio_net_config netcfg = {};
196 NetClientState *nc = qemu_get_queue(n->nic);
197
198 memcpy(&netcfg, config, n->config_size);
199
200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203 memcpy(n->mac, netcfg.mac, ETH_ALEN);
204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205 }
206
207 /*
208 * Is this VDPA? No peer means not VDPA: there's no way to
209 * disconnect/reconnect a VDPA peer.
210 */
211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212 vhost_net_set_config(get_vhost_net(nc->peer),
213 (uint8_t *)&netcfg, 0, n->config_size,
214 VHOST_SET_CONFIG_TYPE_FRONTEND);
215 }
216 }
217
virtio_net_started(VirtIONet * n,uint8_t status)218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220 VirtIODevice *vdev = VIRTIO_DEVICE(n);
221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224
virtio_net_announce_notify(VirtIONet * net)225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227 VirtIODevice *vdev = VIRTIO_DEVICE(net);
228 trace_virtio_net_announce_notify();
229
230 net->status |= VIRTIO_NET_S_ANNOUNCE;
231 virtio_notify_config(vdev);
232 }
233
virtio_net_announce_timer(void * opaque)234 static void virtio_net_announce_timer(void *opaque)
235 {
236 VirtIONet *n = opaque;
237 trace_virtio_net_announce_timer(n->announce_timer.round);
238
239 n->announce_timer.round--;
240 virtio_net_announce_notify(n);
241 }
242
virtio_net_announce(NetClientState * nc)243 static void virtio_net_announce(NetClientState *nc)
244 {
245 VirtIONet *n = qemu_get_nic_opaque(nc);
246 VirtIODevice *vdev = VIRTIO_DEVICE(n);
247
248 /*
249 * Make sure the virtio migration announcement timer isn't running
250 * If it is, let it trigger announcement so that we do not cause
251 * confusion.
252 */
253 if (n->announce_timer.round) {
254 return;
255 }
256
257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259 virtio_net_announce_notify(n);
260 }
261 }
262
virtio_net_vhost_status(VirtIONet * n,uint8_t status)263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265 VirtIODevice *vdev = VIRTIO_DEVICE(n);
266 NetClientState *nc = qemu_get_queue(n->nic);
267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269 n->max_ncs - n->max_queue_pairs : 0;
270
271 if (!get_vhost_net(nc->peer)) {
272 return;
273 }
274
275 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276 !!n->vhost_started) {
277 return;
278 }
279 if (!n->vhost_started) {
280 int r, i;
281
282 if (n->needs_vnet_hdr_swap) {
283 error_report("backend does not support %s vnet headers; "
284 "falling back on userspace virtio",
285 virtio_is_big_endian(vdev) ? "BE" : "LE");
286 return;
287 }
288
289 /* Any packets outstanding? Purge them to avoid touching rings
290 * when vhost is running.
291 */
292 for (i = 0; i < queue_pairs; i++) {
293 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294
295 /* Purge both directions: TX and RX. */
296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298 }
299
300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302 if (r < 0) {
303 error_report("%uBytes MTU not supported by the backend",
304 n->net_conf.mtu);
305
306 return;
307 }
308 }
309
310 n->vhost_started = 1;
311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312 if (r < 0) {
313 error_report("unable to start vhost net: %d: "
314 "falling back on userspace virtio", -r);
315 n->vhost_started = 0;
316 }
317 } else {
318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319 n->vhost_started = 0;
320 }
321 }
322
virtio_net_set_vnet_endian_one(VirtIODevice * vdev,NetClientState * peer,bool enable)323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324 NetClientState *peer,
325 bool enable)
326 {
327 if (virtio_is_big_endian(vdev)) {
328 return qemu_set_vnet_be(peer, enable);
329 } else {
330 return qemu_set_vnet_le(peer, enable);
331 }
332 }
333
virtio_net_set_vnet_endian(VirtIODevice * vdev,NetClientState * ncs,int queue_pairs,bool enable)334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335 int queue_pairs, bool enable)
336 {
337 int i;
338
339 for (i = 0; i < queue_pairs; i++) {
340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341 enable) {
342 while (--i >= 0) {
343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344 }
345
346 return true;
347 }
348 }
349
350 return false;
351 }
352
virtio_net_vnet_endian_status(VirtIONet * n,uint8_t status)353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355 VirtIODevice *vdev = VIRTIO_DEVICE(n);
356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357
358 if (virtio_net_started(n, status)) {
359 /* Before using the device, we tell the network backend about the
360 * endianness to use when parsing vnet headers. If the backend
361 * can't do it, we fallback onto fixing the headers in the core
362 * virtio-net code.
363 */
364 n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365 virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366 queue_pairs, true);
367 } else if (virtio_net_started(n, vdev->status)) {
368 /* After using the device, we need to reset the network backend to
369 * the default (guest native endianness), otherwise the guest may
370 * lose network connectivity if it is rebooted into a different
371 * endianness.
372 */
373 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374 }
375 }
376
virtio_net_drop_tx_queue_data(VirtIODevice * vdev,VirtQueue * vq)377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379 unsigned int dropped = virtqueue_drop_all(vq);
380 if (dropped) {
381 virtio_notify(vdev, vq);
382 }
383 }
384
virtio_net_set_status(struct VirtIODevice * vdev,uint8_t status)385 static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387 VirtIONet *n = VIRTIO_NET(vdev);
388 VirtIONetQueue *q;
389 int i;
390 uint8_t queue_status;
391
392 virtio_net_vnet_endian_status(n, status);
393 virtio_net_vhost_status(n, status);
394
395 for (i = 0; i < n->max_queue_pairs; i++) {
396 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397 bool queue_started;
398 q = &n->vqs[i];
399
400 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401 queue_status = 0;
402 } else {
403 queue_status = status;
404 }
405 queue_started =
406 virtio_net_started(n, queue_status) && !n->vhost_started;
407
408 if (queue_started) {
409 qemu_flush_queued_packets(ncs);
410 }
411
412 if (!q->tx_waiting) {
413 continue;
414 }
415
416 if (queue_started) {
417 if (q->tx_timer) {
418 timer_mod(q->tx_timer,
419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420 } else {
421 replay_bh_schedule_event(q->tx_bh);
422 }
423 } else {
424 if (q->tx_timer) {
425 timer_del(q->tx_timer);
426 } else {
427 qemu_bh_cancel(q->tx_bh);
428 }
429 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431 vdev->vm_running) {
432 /* if tx is waiting we are likely have some packets in tx queue
433 * and disabled notification */
434 q->tx_waiting = 0;
435 virtio_queue_set_notification(q->tx_vq, 1);
436 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437 }
438 }
439 }
440 return 0;
441 }
442
virtio_net_set_link_status(NetClientState * nc)443 static void virtio_net_set_link_status(NetClientState *nc)
444 {
445 VirtIONet *n = qemu_get_nic_opaque(nc);
446 VirtIODevice *vdev = VIRTIO_DEVICE(n);
447 uint16_t old_status = n->status;
448
449 if (nc->link_down)
450 n->status &= ~VIRTIO_NET_S_LINK_UP;
451 else
452 n->status |= VIRTIO_NET_S_LINK_UP;
453
454 if (n->status != old_status)
455 virtio_notify_config(vdev);
456
457 virtio_net_set_status(vdev, vdev->status);
458 }
459
rxfilter_notify(NetClientState * nc)460 static void rxfilter_notify(NetClientState *nc)
461 {
462 VirtIONet *n = qemu_get_nic_opaque(nc);
463
464 if (nc->rxfilter_notify_enabled) {
465 char *path = object_get_canonical_path(OBJECT(n->qdev));
466 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
467 g_free(path);
468
469 /* disable event notification to avoid events flooding */
470 nc->rxfilter_notify_enabled = 0;
471 }
472 }
473
get_vlan_table(VirtIONet * n)474 static intList *get_vlan_table(VirtIONet *n)
475 {
476 intList *list;
477 int i, j;
478
479 list = NULL;
480 for (i = 0; i < MAX_VLAN >> 5; i++) {
481 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
482 if (n->vlans[i] & (1U << j)) {
483 QAPI_LIST_PREPEND(list, (i << 5) + j);
484 }
485 }
486 }
487
488 return list;
489 }
490
virtio_net_query_rxfilter(NetClientState * nc)491 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
492 {
493 VirtIONet *n = qemu_get_nic_opaque(nc);
494 VirtIODevice *vdev = VIRTIO_DEVICE(n);
495 RxFilterInfo *info;
496 strList *str_list;
497 int i;
498
499 info = g_malloc0(sizeof(*info));
500 info->name = g_strdup(nc->name);
501 info->promiscuous = n->promisc;
502
503 if (n->nouni) {
504 info->unicast = RX_STATE_NONE;
505 } else if (n->alluni) {
506 info->unicast = RX_STATE_ALL;
507 } else {
508 info->unicast = RX_STATE_NORMAL;
509 }
510
511 if (n->nomulti) {
512 info->multicast = RX_STATE_NONE;
513 } else if (n->allmulti) {
514 info->multicast = RX_STATE_ALL;
515 } else {
516 info->multicast = RX_STATE_NORMAL;
517 }
518
519 info->broadcast_allowed = n->nobcast;
520 info->multicast_overflow = n->mac_table.multi_overflow;
521 info->unicast_overflow = n->mac_table.uni_overflow;
522
523 info->main_mac = qemu_mac_strdup_printf(n->mac);
524
525 str_list = NULL;
526 for (i = 0; i < n->mac_table.first_multi; i++) {
527 QAPI_LIST_PREPEND(str_list,
528 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
529 }
530 info->unicast_table = str_list;
531
532 str_list = NULL;
533 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
534 QAPI_LIST_PREPEND(str_list,
535 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
536 }
537 info->multicast_table = str_list;
538 info->vlan_table = get_vlan_table(n);
539
540 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
541 info->vlan = RX_STATE_ALL;
542 } else if (!info->vlan_table) {
543 info->vlan = RX_STATE_NONE;
544 } else {
545 info->vlan = RX_STATE_NORMAL;
546 }
547
548 /* enable event notification after query */
549 nc->rxfilter_notify_enabled = 1;
550
551 return info;
552 }
553
virtio_net_queue_reset(VirtIODevice * vdev,uint32_t queue_index)554 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
555 {
556 VirtIONet *n = VIRTIO_NET(vdev);
557 NetClientState *nc;
558
559 /* validate queue_index and skip for cvq */
560 if (queue_index >= n->max_queue_pairs * 2) {
561 return;
562 }
563
564 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
565
566 if (!nc->peer) {
567 return;
568 }
569
570 if (get_vhost_net(nc->peer) &&
571 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
572 vhost_net_virtqueue_reset(vdev, nc, queue_index);
573 }
574
575 flush_or_purge_queued_packets(nc);
576 }
577
virtio_net_queue_enable(VirtIODevice * vdev,uint32_t queue_index)578 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
579 {
580 VirtIONet *n = VIRTIO_NET(vdev);
581 NetClientState *nc;
582 int r;
583
584 /* validate queue_index and skip for cvq */
585 if (queue_index >= n->max_queue_pairs * 2) {
586 return;
587 }
588
589 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
590
591 if (!nc->peer || !vdev->vhost_started) {
592 return;
593 }
594
595 if (get_vhost_net(nc->peer) &&
596 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
597 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
598 if (r < 0) {
599 error_report("unable to restart vhost net virtqueue: %d, "
600 "when resetting the queue", queue_index);
601 }
602 }
603 }
604
peer_test_vnet_hdr(VirtIONet * n)605 static void peer_test_vnet_hdr(VirtIONet *n)
606 {
607 NetClientState *nc = qemu_get_queue(n->nic);
608 if (!nc->peer) {
609 return;
610 }
611
612 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
613 }
614
peer_has_vnet_hdr(VirtIONet * n)615 static int peer_has_vnet_hdr(VirtIONet *n)
616 {
617 return n->has_vnet_hdr;
618 }
619
peer_has_ufo(VirtIONet * n)620 static int peer_has_ufo(VirtIONet *n)
621 {
622 if (!peer_has_vnet_hdr(n))
623 return 0;
624
625 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
626
627 return n->has_ufo;
628 }
629
peer_has_uso(VirtIONet * n)630 static int peer_has_uso(VirtIONet *n)
631 {
632 if (!peer_has_vnet_hdr(n)) {
633 return 0;
634 }
635
636 return qemu_has_uso(qemu_get_queue(n->nic)->peer);
637 }
638
virtio_net_set_mrg_rx_bufs(VirtIONet * n,int mergeable_rx_bufs,int version_1,int hash_report)639 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
640 int version_1, int hash_report)
641 {
642 int i;
643 NetClientState *nc;
644
645 n->mergeable_rx_bufs = mergeable_rx_bufs;
646
647 if (version_1) {
648 n->guest_hdr_len = hash_report ?
649 sizeof(struct virtio_net_hdr_v1_hash) :
650 sizeof(struct virtio_net_hdr_mrg_rxbuf);
651 n->rss_data.populate_hash = !!hash_report;
652 } else {
653 n->guest_hdr_len = n->mergeable_rx_bufs ?
654 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
655 sizeof(struct virtio_net_hdr);
656 n->rss_data.populate_hash = false;
657 }
658
659 for (i = 0; i < n->max_queue_pairs; i++) {
660 nc = qemu_get_subqueue(n->nic, i);
661
662 if (peer_has_vnet_hdr(n) &&
663 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
664 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
665 n->host_hdr_len = n->guest_hdr_len;
666 }
667 }
668 }
669
virtio_net_max_tx_queue_size(VirtIONet * n)670 static int virtio_net_max_tx_queue_size(VirtIONet *n)
671 {
672 NetClientState *peer = n->nic_conf.peers.ncs[0];
673 struct vhost_net *net;
674
675 if (!peer) {
676 goto default_value;
677 }
678
679 net = get_vhost_net(peer);
680
681 if (!net || !net->max_tx_queue_size) {
682 goto default_value;
683 }
684
685 return net->max_tx_queue_size;
686
687 default_value:
688 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
689 }
690
peer_attach(VirtIONet * n,int index)691 static int peer_attach(VirtIONet *n, int index)
692 {
693 NetClientState *nc = qemu_get_subqueue(n->nic, index);
694 struct vhost_net *net;
695
696 if (!nc->peer) {
697 return 0;
698 }
699
700 net = get_vhost_net(nc->peer);
701 if (net && net->is_vhost_user) {
702 vhost_net_set_vring_enable(nc->peer, 1);
703 }
704
705 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
706 return 0;
707 }
708
709 if (n->max_queue_pairs == 1) {
710 return 0;
711 }
712
713 return tap_enable(nc->peer);
714 }
715
peer_detach(VirtIONet * n,int index)716 static int peer_detach(VirtIONet *n, int index)
717 {
718 NetClientState *nc = qemu_get_subqueue(n->nic, index);
719 struct vhost_net *net;
720
721 if (!nc->peer) {
722 return 0;
723 }
724
725 net = get_vhost_net(nc->peer);
726 if (net && net->is_vhost_user) {
727 vhost_net_set_vring_enable(nc->peer, 0);
728 }
729
730 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
731 return 0;
732 }
733
734 return tap_disable(nc->peer);
735 }
736
virtio_net_set_queue_pairs(VirtIONet * n)737 static void virtio_net_set_queue_pairs(VirtIONet *n)
738 {
739 int i;
740 int r;
741
742 if (n->nic->peer_deleted) {
743 return;
744 }
745
746 for (i = 0; i < n->max_queue_pairs; i++) {
747 if (i < n->curr_queue_pairs) {
748 r = peer_attach(n, i);
749 assert(!r);
750 } else {
751 r = peer_detach(n, i);
752 assert(!r);
753 }
754 }
755 }
756
757 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
758
virtio_net_bad_features(VirtIODevice * vdev)759 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
760 {
761 uint64_t features = 0;
762
763 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
764 * but also these: */
765 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
766 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
767 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
770
771 return features;
772 }
773
virtio_net_apply_guest_offloads(VirtIONet * n)774 static void virtio_net_apply_guest_offloads(VirtIONet *n)
775 {
776 qemu_set_offload(qemu_get_queue(n->nic)->peer,
777 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
778 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
779 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
780 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
781 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
782 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
783 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
784 }
785
virtio_net_guest_offloads_by_features(uint64_t features)786 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
787 {
788 static const uint64_t guest_offloads_mask =
789 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
790 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
791 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
792 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
793 (1ULL << VIRTIO_NET_F_GUEST_UFO) |
794 (1ULL << VIRTIO_NET_F_GUEST_USO4) |
795 (1ULL << VIRTIO_NET_F_GUEST_USO6);
796
797 return guest_offloads_mask & features;
798 }
799
virtio_net_supported_guest_offloads(const VirtIONet * n)800 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
801 {
802 VirtIODevice *vdev = VIRTIO_DEVICE(n);
803 return virtio_net_guest_offloads_by_features(vdev->guest_features);
804 }
805
806 typedef struct {
807 VirtIONet *n;
808 DeviceState *dev;
809 } FailoverDevice;
810
811 /**
812 * Set the failover primary device
813 *
814 * @opaque: FailoverId to setup
815 * @opts: opts for device we are handling
816 * @errp: returns an error if this function fails
817 */
failover_set_primary(DeviceState * dev,void * opaque)818 static int failover_set_primary(DeviceState *dev, void *opaque)
819 {
820 FailoverDevice *fdev = opaque;
821 PCIDevice *pci_dev = (PCIDevice *)
822 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
823
824 if (!pci_dev) {
825 return 0;
826 }
827
828 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
829 fdev->dev = dev;
830 return 1;
831 }
832
833 return 0;
834 }
835
836 /**
837 * Find the primary device for this failover virtio-net
838 *
839 * @n: VirtIONet device
840 * @errp: returns an error if this function fails
841 */
failover_find_primary_device(VirtIONet * n)842 static DeviceState *failover_find_primary_device(VirtIONet *n)
843 {
844 FailoverDevice fdev = {
845 .n = n,
846 };
847
848 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
849 NULL, NULL, &fdev);
850 return fdev.dev;
851 }
852
failover_add_primary(VirtIONet * n,Error ** errp)853 static void failover_add_primary(VirtIONet *n, Error **errp)
854 {
855 Error *err = NULL;
856 DeviceState *dev = failover_find_primary_device(n);
857
858 if (dev) {
859 return;
860 }
861
862 if (!n->primary_opts) {
863 error_setg(errp, "Primary device not found");
864 error_append_hint(errp, "Virtio-net failover will not work. Make "
865 "sure primary device has parameter"
866 " failover_pair_id=%s\n", n->netclient_name);
867 return;
868 }
869
870 dev = qdev_device_add_from_qdict(n->primary_opts,
871 n->primary_opts_from_json,
872 &err);
873 if (err) {
874 qobject_unref(n->primary_opts);
875 n->primary_opts = NULL;
876 } else {
877 object_unref(OBJECT(dev));
878 }
879 error_propagate(errp, err);
880 }
881
virtio_net_set_features(VirtIODevice * vdev,uint64_t features)882 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
883 {
884 VirtIONet *n = VIRTIO_NET(vdev);
885 Error *err = NULL;
886 int i;
887
888 if (n->mtu_bypass_backend &&
889 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
890 features &= ~(1ULL << VIRTIO_NET_F_MTU);
891 }
892
893 virtio_net_set_multiqueue(n,
894 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
895 virtio_has_feature(features, VIRTIO_NET_F_MQ));
896
897 virtio_net_set_mrg_rx_bufs(n,
898 virtio_has_feature(features,
899 VIRTIO_NET_F_MRG_RXBUF),
900 virtio_has_feature(features,
901 VIRTIO_F_VERSION_1),
902 virtio_has_feature(features,
903 VIRTIO_NET_F_HASH_REPORT));
904
905 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
906 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
907 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
909 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
910
911 if (n->has_vnet_hdr) {
912 n->curr_guest_offloads =
913 virtio_net_guest_offloads_by_features(features);
914 virtio_net_apply_guest_offloads(n);
915 }
916
917 for (i = 0; i < n->max_queue_pairs; i++) {
918 NetClientState *nc = qemu_get_subqueue(n->nic, i);
919
920 if (!get_vhost_net(nc->peer)) {
921 continue;
922 }
923 vhost_net_ack_features(get_vhost_net(nc->peer), features);
924
925 /*
926 * keep acked_features in NetVhostUserState up-to-date so it
927 * can't miss any features configured by guest virtio driver.
928 */
929 vhost_net_save_acked_features(nc->peer);
930 }
931
932 if (virtio_has_feature(vdev->guest_features ^ features, VIRTIO_NET_F_CTRL_VLAN)) {
933 bool vlan = virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN);
934 memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3);
935 }
936
937 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
938 qapi_event_send_failover_negotiated(n->netclient_name);
939 qatomic_set(&n->failover_primary_hidden, false);
940 failover_add_primary(n, &err);
941 if (err) {
942 if (!qtest_enabled()) {
943 warn_report_err(err);
944 } else {
945 error_free(err);
946 }
947 }
948 }
949 }
950
virtio_net_handle_rx_mode(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)951 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
952 struct iovec *iov, unsigned int iov_cnt)
953 {
954 uint8_t on;
955 size_t s;
956 NetClientState *nc = qemu_get_queue(n->nic);
957
958 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
959 if (s != sizeof(on)) {
960 return VIRTIO_NET_ERR;
961 }
962
963 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
964 n->promisc = on;
965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
966 n->allmulti = on;
967 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
968 n->alluni = on;
969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
970 n->nomulti = on;
971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
972 n->nouni = on;
973 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
974 n->nobcast = on;
975 } else {
976 return VIRTIO_NET_ERR;
977 }
978
979 rxfilter_notify(nc);
980
981 return VIRTIO_NET_OK;
982 }
983
virtio_net_handle_offloads(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)984 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
985 struct iovec *iov, unsigned int iov_cnt)
986 {
987 VirtIODevice *vdev = VIRTIO_DEVICE(n);
988 uint64_t offloads;
989 size_t s;
990
991 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
992 return VIRTIO_NET_ERR;
993 }
994
995 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
996 if (s != sizeof(offloads)) {
997 return VIRTIO_NET_ERR;
998 }
999
1000 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1001 uint64_t supported_offloads;
1002
1003 offloads = virtio_ldq_p(vdev, &offloads);
1004
1005 if (!n->has_vnet_hdr) {
1006 return VIRTIO_NET_ERR;
1007 }
1008
1009 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1011 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1012 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1013 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1014
1015 supported_offloads = virtio_net_supported_guest_offloads(n);
1016 if (offloads & ~supported_offloads) {
1017 return VIRTIO_NET_ERR;
1018 }
1019
1020 n->curr_guest_offloads = offloads;
1021 virtio_net_apply_guest_offloads(n);
1022
1023 return VIRTIO_NET_OK;
1024 } else {
1025 return VIRTIO_NET_ERR;
1026 }
1027 }
1028
virtio_net_handle_mac(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1029 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1030 struct iovec *iov, unsigned int iov_cnt)
1031 {
1032 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1033 struct virtio_net_ctrl_mac mac_data;
1034 size_t s;
1035 NetClientState *nc = qemu_get_queue(n->nic);
1036
1037 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1038 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1039 return VIRTIO_NET_ERR;
1040 }
1041 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1042 assert(s == sizeof(n->mac));
1043 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1044 rxfilter_notify(nc);
1045
1046 return VIRTIO_NET_OK;
1047 }
1048
1049 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1050 return VIRTIO_NET_ERR;
1051 }
1052
1053 int in_use = 0;
1054 int first_multi = 0;
1055 uint8_t uni_overflow = 0;
1056 uint8_t multi_overflow = 0;
1057 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1058
1059 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1060 sizeof(mac_data.entries));
1061 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1062 if (s != sizeof(mac_data.entries)) {
1063 goto error;
1064 }
1065 iov_discard_front(&iov, &iov_cnt, s);
1066
1067 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1068 goto error;
1069 }
1070
1071 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1072 s = iov_to_buf(iov, iov_cnt, 0, macs,
1073 mac_data.entries * ETH_ALEN);
1074 if (s != mac_data.entries * ETH_ALEN) {
1075 goto error;
1076 }
1077 in_use += mac_data.entries;
1078 } else {
1079 uni_overflow = 1;
1080 }
1081
1082 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1083
1084 first_multi = in_use;
1085
1086 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1087 sizeof(mac_data.entries));
1088 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1089 if (s != sizeof(mac_data.entries)) {
1090 goto error;
1091 }
1092
1093 iov_discard_front(&iov, &iov_cnt, s);
1094
1095 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1096 goto error;
1097 }
1098
1099 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1100 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1101 mac_data.entries * ETH_ALEN);
1102 if (s != mac_data.entries * ETH_ALEN) {
1103 goto error;
1104 }
1105 in_use += mac_data.entries;
1106 } else {
1107 multi_overflow = 1;
1108 }
1109
1110 n->mac_table.in_use = in_use;
1111 n->mac_table.first_multi = first_multi;
1112 n->mac_table.uni_overflow = uni_overflow;
1113 n->mac_table.multi_overflow = multi_overflow;
1114 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1115 g_free(macs);
1116 rxfilter_notify(nc);
1117
1118 return VIRTIO_NET_OK;
1119
1120 error:
1121 g_free(macs);
1122 return VIRTIO_NET_ERR;
1123 }
1124
virtio_net_handle_vlan_table(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1125 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1126 struct iovec *iov, unsigned int iov_cnt)
1127 {
1128 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1129 uint16_t vid;
1130 size_t s;
1131 NetClientState *nc = qemu_get_queue(n->nic);
1132
1133 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1134 vid = virtio_lduw_p(vdev, &vid);
1135 if (s != sizeof(vid)) {
1136 return VIRTIO_NET_ERR;
1137 }
1138
1139 if (vid >= MAX_VLAN)
1140 return VIRTIO_NET_ERR;
1141
1142 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1143 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1144 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1145 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1146 else
1147 return VIRTIO_NET_ERR;
1148
1149 rxfilter_notify(nc);
1150
1151 return VIRTIO_NET_OK;
1152 }
1153
virtio_net_handle_announce(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1154 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1155 struct iovec *iov, unsigned int iov_cnt)
1156 {
1157 trace_virtio_net_handle_announce(n->announce_timer.round);
1158 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1159 n->status & VIRTIO_NET_S_ANNOUNCE) {
1160 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1161 if (n->announce_timer.round) {
1162 qemu_announce_timer_step(&n->announce_timer);
1163 }
1164 return VIRTIO_NET_OK;
1165 } else {
1166 return VIRTIO_NET_ERR;
1167 }
1168 }
1169
virtio_net_attach_ebpf_to_backend(NICState * nic,int prog_fd)1170 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1171 {
1172 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1173 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1174 return false;
1175 }
1176
1177 trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1178 return nc->info->set_steering_ebpf(nc, prog_fd);
1179 }
1180
rss_data_to_rss_config(struct VirtioNetRssData * data,struct EBPFRSSConfig * config)1181 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1182 struct EBPFRSSConfig *config)
1183 {
1184 config->redirect = data->redirect;
1185 config->populate_hash = data->populate_hash;
1186 config->hash_types = data->runtime_hash_types;
1187 config->indirections_len = data->indirections_len;
1188 config->default_queue = data->default_queue;
1189 }
1190
virtio_net_attach_ebpf_rss(VirtIONet * n)1191 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1192 {
1193 struct EBPFRSSConfig config = {};
1194
1195 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1196 return false;
1197 }
1198
1199 rss_data_to_rss_config(&n->rss_data, &config);
1200
1201 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1202 n->rss_data.indirections_table, n->rss_data.key,
1203 NULL)) {
1204 return false;
1205 }
1206
1207 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1208 return false;
1209 }
1210
1211 return true;
1212 }
1213
virtio_net_detach_ebpf_rss(VirtIONet * n)1214 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1215 {
1216 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1217 }
1218
virtio_net_commit_rss_config(VirtIONet * n)1219 static void virtio_net_commit_rss_config(VirtIONet *n)
1220 {
1221 if (n->rss_data.peer_hash_available) {
1222 return;
1223 }
1224
1225 if (n->rss_data.enabled) {
1226 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1227 if (n->rss_data.populate_hash) {
1228 virtio_net_detach_ebpf_rss(n);
1229 } else if (!virtio_net_attach_ebpf_rss(n)) {
1230 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1231 warn_report("Can't load eBPF RSS for vhost");
1232 } else {
1233 warn_report("Can't load eBPF RSS - fallback to software RSS");
1234 n->rss_data.enabled_software_rss = true;
1235 }
1236 }
1237
1238 trace_virtio_net_rss_enable(n,
1239 n->rss_data.runtime_hash_types,
1240 n->rss_data.indirections_len,
1241 sizeof(n->rss_data.key));
1242 } else {
1243 virtio_net_detach_ebpf_rss(n);
1244 trace_virtio_net_rss_disable(n);
1245 }
1246 }
1247
virtio_net_disable_rss(VirtIONet * n)1248 static void virtio_net_disable_rss(VirtIONet *n)
1249 {
1250 if (!n->rss_data.enabled) {
1251 return;
1252 }
1253
1254 n->rss_data.enabled = false;
1255 virtio_net_commit_rss_config(n);
1256 }
1257
virtio_net_load_ebpf_fds(VirtIONet * n,Error ** errp)1258 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1259 {
1260 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1261 int ret = true;
1262 int i = 0;
1263
1264 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1265 error_setg(errp, "Expected %d file descriptors but got %d",
1266 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1267 return false;
1268 }
1269
1270 for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1271 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1272 if (fds[i] < 0) {
1273 ret = false;
1274 goto exit;
1275 }
1276 }
1277
1278 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1279
1280 exit:
1281 if (!ret) {
1282 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1283 close(fds[i]);
1284 }
1285 }
1286
1287 return ret;
1288 }
1289
virtio_net_load_ebpf(VirtIONet * n,Error ** errp)1290 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1291 {
1292 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1293 return true;
1294 }
1295
1296 trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1297
1298 /*
1299 * If user explicitly gave QEMU RSS FDs to use, then
1300 * failing to use them must be considered a fatal
1301 * error. If no RSS FDs were provided, QEMU is trying
1302 * eBPF on a "best effort" basis only, so report a
1303 * warning and allow fallback to software RSS.
1304 */
1305 if (n->ebpf_rss_fds) {
1306 return virtio_net_load_ebpf_fds(n, errp);
1307 }
1308
1309 ebpf_rss_load(&n->ebpf_rss, &error_warn);
1310 return true;
1311 }
1312
virtio_net_unload_ebpf(VirtIONet * n)1313 static void virtio_net_unload_ebpf(VirtIONet *n)
1314 {
1315 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1316 ebpf_rss_unload(&n->ebpf_rss);
1317 }
1318
virtio_net_handle_rss(VirtIONet * n,struct iovec * iov,unsigned int iov_cnt,bool do_rss)1319 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1320 struct iovec *iov,
1321 unsigned int iov_cnt,
1322 bool do_rss)
1323 {
1324 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1325 struct virtio_net_rss_config cfg;
1326 size_t s, offset = 0, size_get;
1327 uint16_t queue_pairs, i;
1328 struct {
1329 uint16_t us;
1330 uint8_t b;
1331 } QEMU_PACKED temp;
1332 const char *err_msg = "";
1333 uint32_t err_value = 0;
1334
1335 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1336 err_msg = "RSS is not negotiated";
1337 goto error;
1338 }
1339 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1340 err_msg = "Hash report is not negotiated";
1341 goto error;
1342 }
1343 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1344 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1345 if (s != size_get) {
1346 err_msg = "Short command buffer";
1347 err_value = (uint32_t)s;
1348 goto error;
1349 }
1350 n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1351 n->rss_data.indirections_len =
1352 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1353 if (!do_rss) {
1354 n->rss_data.indirections_len = 0;
1355 }
1356 if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1357 err_msg = "Too large indirection table";
1358 err_value = n->rss_data.indirections_len;
1359 goto error;
1360 }
1361 n->rss_data.indirections_len++;
1362 if (!is_power_of_2(n->rss_data.indirections_len)) {
1363 err_msg = "Invalid size of indirection table";
1364 err_value = n->rss_data.indirections_len;
1365 goto error;
1366 }
1367 n->rss_data.default_queue = do_rss ?
1368 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1369 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1370 err_msg = "Invalid default queue";
1371 err_value = n->rss_data.default_queue;
1372 goto error;
1373 }
1374 offset += size_get;
1375 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1376 g_free(n->rss_data.indirections_table);
1377 n->rss_data.indirections_table = g_malloc(size_get);
1378 if (!n->rss_data.indirections_table) {
1379 err_msg = "Can't allocate indirections table";
1380 err_value = n->rss_data.indirections_len;
1381 goto error;
1382 }
1383 s = iov_to_buf(iov, iov_cnt, offset,
1384 n->rss_data.indirections_table, size_get);
1385 if (s != size_get) {
1386 err_msg = "Short indirection table buffer";
1387 err_value = (uint32_t)s;
1388 goto error;
1389 }
1390 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1391 uint16_t val = n->rss_data.indirections_table[i];
1392 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1393 }
1394 offset += size_get;
1395 size_get = sizeof(temp);
1396 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1397 if (s != size_get) {
1398 err_msg = "Can't get queue_pairs";
1399 err_value = (uint32_t)s;
1400 goto error;
1401 }
1402 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1403 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1404 err_msg = "Invalid number of queue_pairs";
1405 err_value = queue_pairs;
1406 goto error;
1407 }
1408 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1409 err_msg = "Invalid key size";
1410 err_value = temp.b;
1411 goto error;
1412 }
1413 if (!temp.b && n->rss_data.runtime_hash_types) {
1414 err_msg = "No key provided";
1415 err_value = 0;
1416 goto error;
1417 }
1418 if (!temp.b && !n->rss_data.runtime_hash_types) {
1419 virtio_net_disable_rss(n);
1420 return queue_pairs;
1421 }
1422 offset += size_get;
1423 size_get = temp.b;
1424 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1425 if (s != size_get) {
1426 err_msg = "Can get key buffer";
1427 err_value = (uint32_t)s;
1428 goto error;
1429 }
1430 n->rss_data.enabled = true;
1431 virtio_net_commit_rss_config(n);
1432 return queue_pairs;
1433 error:
1434 trace_virtio_net_rss_error(n, err_msg, err_value);
1435 virtio_net_disable_rss(n);
1436 return 0;
1437 }
1438
virtio_net_handle_mq(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1439 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1440 struct iovec *iov, unsigned int iov_cnt)
1441 {
1442 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1443 uint16_t queue_pairs;
1444 NetClientState *nc = qemu_get_queue(n->nic);
1445
1446 virtio_net_disable_rss(n);
1447 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1448 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1449 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1450 }
1451 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1452 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1453 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1454 struct virtio_net_ctrl_mq mq;
1455 size_t s;
1456 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1457 return VIRTIO_NET_ERR;
1458 }
1459 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1460 if (s != sizeof(mq)) {
1461 return VIRTIO_NET_ERR;
1462 }
1463 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1464
1465 } else {
1466 return VIRTIO_NET_ERR;
1467 }
1468
1469 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1470 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1471 queue_pairs > n->max_queue_pairs ||
1472 !n->multiqueue) {
1473 return VIRTIO_NET_ERR;
1474 }
1475
1476 n->curr_queue_pairs = queue_pairs;
1477 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1478 /*
1479 * Avoid updating the backend for a vdpa device: We're only interested
1480 * in updating the device model queues.
1481 */
1482 return VIRTIO_NET_OK;
1483 }
1484 /* stop the backend before changing the number of queue_pairs to avoid handling a
1485 * disabled queue */
1486 virtio_net_set_status(vdev, vdev->status);
1487 virtio_net_set_queue_pairs(n);
1488
1489 return VIRTIO_NET_OK;
1490 }
1491
virtio_net_handle_ctrl_iov(VirtIODevice * vdev,const struct iovec * in_sg,unsigned in_num,const struct iovec * out_sg,unsigned out_num)1492 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1493 const struct iovec *in_sg, unsigned in_num,
1494 const struct iovec *out_sg,
1495 unsigned out_num)
1496 {
1497 VirtIONet *n = VIRTIO_NET(vdev);
1498 struct virtio_net_ctrl_hdr ctrl;
1499 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1500 size_t s;
1501 struct iovec *iov, *iov2;
1502
1503 if (iov_size(in_sg, in_num) < sizeof(status) ||
1504 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1505 virtio_error(vdev, "virtio-net ctrl missing headers");
1506 return 0;
1507 }
1508
1509 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1510 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1511 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1512 if (s != sizeof(ctrl)) {
1513 status = VIRTIO_NET_ERR;
1514 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1515 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1516 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1517 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1518 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1519 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1520 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1521 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1522 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1523 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1524 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1525 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1526 }
1527
1528 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1529 assert(s == sizeof(status));
1530
1531 g_free(iov2);
1532 return sizeof(status);
1533 }
1534
virtio_net_handle_ctrl(VirtIODevice * vdev,VirtQueue * vq)1535 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1536 {
1537 VirtQueueElement *elem;
1538
1539 for (;;) {
1540 size_t written;
1541 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1542 if (!elem) {
1543 break;
1544 }
1545
1546 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1547 elem->out_sg, elem->out_num);
1548 if (written > 0) {
1549 virtqueue_push(vq, elem, written);
1550 virtio_notify(vdev, vq);
1551 g_free(elem);
1552 } else {
1553 virtqueue_detach_element(vq, elem, 0);
1554 g_free(elem);
1555 break;
1556 }
1557 }
1558 }
1559
1560 /* RX */
1561
virtio_net_handle_rx(VirtIODevice * vdev,VirtQueue * vq)1562 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1563 {
1564 VirtIONet *n = VIRTIO_NET(vdev);
1565 int queue_index = vq2q(virtio_get_queue_index(vq));
1566
1567 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1568 }
1569
virtio_net_can_receive(NetClientState * nc)1570 static bool virtio_net_can_receive(NetClientState *nc)
1571 {
1572 VirtIONet *n = qemu_get_nic_opaque(nc);
1573 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1574 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1575
1576 if (!vdev->vm_running) {
1577 return false;
1578 }
1579
1580 if (nc->queue_index >= n->curr_queue_pairs) {
1581 return false;
1582 }
1583
1584 if (!virtio_queue_ready(q->rx_vq) ||
1585 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1586 return false;
1587 }
1588
1589 return true;
1590 }
1591
virtio_net_has_buffers(VirtIONetQueue * q,int bufsize)1592 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1593 {
1594 int opaque;
1595 unsigned int in_bytes;
1596 VirtIONet *n = q->n;
1597
1598 while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1599 opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1600 bufsize, 0);
1601 /* Buffer is enough, disable notifiaction */
1602 if (bufsize <= in_bytes) {
1603 break;
1604 }
1605
1606 if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1607 /* Guest has added some buffers, try again */
1608 continue;
1609 } else {
1610 return 0;
1611 }
1612 }
1613
1614 virtio_queue_set_notification(q->rx_vq, 0);
1615
1616 return 1;
1617 }
1618
virtio_net_hdr_swap(VirtIODevice * vdev,struct virtio_net_hdr * hdr)1619 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1620 {
1621 virtio_tswap16s(vdev, &hdr->hdr_len);
1622 virtio_tswap16s(vdev, &hdr->gso_size);
1623 virtio_tswap16s(vdev, &hdr->csum_start);
1624 virtio_tswap16s(vdev, &hdr->csum_offset);
1625 }
1626
1627 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1628 * it never finds out that the packets don't have valid checksums. This
1629 * causes dhclient to get upset. Fedora's carried a patch for ages to
1630 * fix this with Xen but it hasn't appeared in an upstream release of
1631 * dhclient yet.
1632 *
1633 * To avoid breaking existing guests, we catch udp packets and add
1634 * checksums. This is terrible but it's better than hacking the guest
1635 * kernels.
1636 *
1637 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1638 * we should provide a mechanism to disable it to avoid polluting the host
1639 * cache.
1640 */
work_around_broken_dhclient(struct virtio_net_hdr * hdr,uint8_t * buf,size_t size)1641 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1642 uint8_t *buf, size_t size)
1643 {
1644 size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1645 sizeof(struct udp_header);
1646
1647 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1648 (size >= csum_size && size < 1500) && /* normal sized MTU */
1649 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1650 (buf[23] == 17) && /* ip.protocol == UDP */
1651 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1652 net_checksum_calculate(buf, size, CSUM_UDP);
1653 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1654 }
1655 }
1656
receive_header(VirtIONet * n,const struct iovec * iov,int iov_cnt,const void * buf,size_t size)1657 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1658 const void *buf, size_t size)
1659 {
1660 if (n->has_vnet_hdr) {
1661 /* FIXME this cast is evil */
1662 void *wbuf = (void *)buf;
1663 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1664 size - n->host_hdr_len);
1665
1666 if (n->needs_vnet_hdr_swap) {
1667 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1668 }
1669 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1670 } else {
1671 struct virtio_net_hdr hdr = {
1672 .flags = 0,
1673 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1674 };
1675 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1676 }
1677 }
1678
receive_filter(VirtIONet * n,const uint8_t * buf,int size)1679 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1680 {
1681 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1682 static const uint8_t vlan[] = {0x81, 0x00};
1683 uint8_t *ptr = (uint8_t *)buf;
1684 int i;
1685
1686 if (n->promisc)
1687 return 1;
1688
1689 ptr += n->host_hdr_len;
1690
1691 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1692 int vid = lduw_be_p(ptr + 14) & 0xfff;
1693 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1694 return 0;
1695 }
1696
1697 if (ptr[0] & 1) { // multicast
1698 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1699 return !n->nobcast;
1700 } else if (n->nomulti) {
1701 return 0;
1702 } else if (n->allmulti || n->mac_table.multi_overflow) {
1703 return 1;
1704 }
1705
1706 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1707 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1708 return 1;
1709 }
1710 }
1711 } else { // unicast
1712 if (n->nouni) {
1713 return 0;
1714 } else if (n->alluni || n->mac_table.uni_overflow) {
1715 return 1;
1716 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1717 return 1;
1718 }
1719
1720 for (i = 0; i < n->mac_table.first_multi; i++) {
1721 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1722 return 1;
1723 }
1724 }
1725 }
1726
1727 return 0;
1728 }
1729
virtio_net_get_hash_type(bool hasip4,bool hasip6,EthL4HdrProto l4hdr_proto,uint32_t types)1730 static uint8_t virtio_net_get_hash_type(bool hasip4,
1731 bool hasip6,
1732 EthL4HdrProto l4hdr_proto,
1733 uint32_t types)
1734 {
1735 if (hasip4) {
1736 switch (l4hdr_proto) {
1737 case ETH_L4_HDR_PROTO_TCP:
1738 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1739 return NetPktRssIpV4Tcp;
1740 }
1741 break;
1742
1743 case ETH_L4_HDR_PROTO_UDP:
1744 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1745 return NetPktRssIpV4Udp;
1746 }
1747 break;
1748
1749 default:
1750 break;
1751 }
1752
1753 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1754 return NetPktRssIpV4;
1755 }
1756 } else if (hasip6) {
1757 switch (l4hdr_proto) {
1758 case ETH_L4_HDR_PROTO_TCP:
1759 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1760 return NetPktRssIpV6TcpEx;
1761 }
1762 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1763 return NetPktRssIpV6Tcp;
1764 }
1765 break;
1766
1767 case ETH_L4_HDR_PROTO_UDP:
1768 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1769 return NetPktRssIpV6UdpEx;
1770 }
1771 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1772 return NetPktRssIpV6Udp;
1773 }
1774 break;
1775
1776 default:
1777 break;
1778 }
1779
1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1781 return NetPktRssIpV6Ex;
1782 }
1783 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1784 return NetPktRssIpV6;
1785 }
1786 }
1787 return 0xff;
1788 }
1789
virtio_net_process_rss(NetClientState * nc,const uint8_t * buf,size_t size,struct virtio_net_hdr_v1_hash * hdr)1790 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1791 size_t size,
1792 struct virtio_net_hdr_v1_hash *hdr)
1793 {
1794 VirtIONet *n = qemu_get_nic_opaque(nc);
1795 unsigned int index = nc->queue_index, new_index = index;
1796 struct NetRxPkt *pkt = n->rx_pkt;
1797 uint8_t net_hash_type;
1798 uint32_t hash;
1799 bool hasip4, hasip6;
1800 EthL4HdrProto l4hdr_proto;
1801 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1802 VIRTIO_NET_HASH_REPORT_IPv4,
1803 VIRTIO_NET_HASH_REPORT_TCPv4,
1804 VIRTIO_NET_HASH_REPORT_TCPv6,
1805 VIRTIO_NET_HASH_REPORT_IPv6,
1806 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1807 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1808 VIRTIO_NET_HASH_REPORT_UDPv4,
1809 VIRTIO_NET_HASH_REPORT_UDPv6,
1810 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1811 };
1812 struct iovec iov = {
1813 .iov_base = (void *)buf,
1814 .iov_len = size
1815 };
1816
1817 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1818 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1819 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1820 n->rss_data.runtime_hash_types);
1821 if (net_hash_type > NetPktRssIpV6UdpEx) {
1822 if (n->rss_data.populate_hash) {
1823 hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1824 hdr->hash_report = 0;
1825 }
1826 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1827 }
1828
1829 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1830
1831 if (n->rss_data.populate_hash) {
1832 hdr->hash_value = hash;
1833 hdr->hash_report = reports[net_hash_type];
1834 }
1835
1836 if (n->rss_data.redirect) {
1837 new_index = hash & (n->rss_data.indirections_len - 1);
1838 new_index = n->rss_data.indirections_table[new_index];
1839 }
1840
1841 return (index == new_index) ? -1 : new_index;
1842 }
1843
virtio_net_receive_rcu(NetClientState * nc,const uint8_t * buf,size_t size)1844 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1845 size_t size)
1846 {
1847 VirtIONet *n = qemu_get_nic_opaque(nc);
1848 VirtIONetQueue *q;
1849 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1850 QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1851 QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE];
1852 QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1853 struct virtio_net_hdr_v1_hash extra_hdr;
1854 unsigned mhdr_cnt = 0;
1855 size_t offset, i, guest_offset, j;
1856 ssize_t err;
1857
1858 memset(&extra_hdr, 0, sizeof(extra_hdr));
1859
1860 if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1861 int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1862 if (index >= 0) {
1863 nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1864 }
1865 }
1866
1867 if (!virtio_net_can_receive(nc)) {
1868 return -1;
1869 }
1870
1871 q = virtio_net_get_subqueue(nc);
1872
1873 /* hdr_len refers to the header we supply to the guest */
1874 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1875 return 0;
1876 }
1877
1878 if (!receive_filter(n, buf, size))
1879 return size;
1880
1881 offset = i = 0;
1882
1883 while (offset < size) {
1884 VirtQueueElement *elem;
1885 int len, total;
1886 const struct iovec *sg;
1887
1888 total = 0;
1889
1890 if (i == VIRTQUEUE_MAX_SIZE) {
1891 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1892 err = size;
1893 goto err;
1894 }
1895
1896 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1897 if (!elem) {
1898 if (i) {
1899 virtio_error(vdev, "virtio-net unexpected empty queue: "
1900 "i %zd mergeable %d offset %zd, size %zd, "
1901 "guest hdr len %zd, host hdr len %zd "
1902 "guest features 0x%" PRIx64,
1903 i, n->mergeable_rx_bufs, offset, size,
1904 n->guest_hdr_len, n->host_hdr_len,
1905 vdev->guest_features);
1906 }
1907 err = -1;
1908 goto err;
1909 }
1910
1911 if (elem->in_num < 1) {
1912 virtio_error(vdev,
1913 "virtio-net receive queue contains no in buffers");
1914 virtqueue_detach_element(q->rx_vq, elem, 0);
1915 g_free(elem);
1916 err = -1;
1917 goto err;
1918 }
1919
1920 sg = elem->in_sg;
1921 if (i == 0) {
1922 assert(offset == 0);
1923 if (n->mergeable_rx_bufs) {
1924 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1925 sg, elem->in_num,
1926 offsetof(typeof(extra_hdr), hdr.num_buffers),
1927 sizeof(extra_hdr.hdr.num_buffers));
1928 } else {
1929 extra_hdr.hdr.num_buffers = cpu_to_le16(1);
1930 }
1931
1932 receive_header(n, sg, elem->in_num, buf, size);
1933 if (n->rss_data.populate_hash) {
1934 offset = offsetof(typeof(extra_hdr), hash_value);
1935 iov_from_buf(sg, elem->in_num, offset,
1936 (char *)&extra_hdr + offset,
1937 sizeof(extra_hdr.hash_value) +
1938 sizeof(extra_hdr.hash_report));
1939 }
1940 offset = n->host_hdr_len;
1941 total += n->guest_hdr_len;
1942 guest_offset = n->guest_hdr_len;
1943 } else {
1944 guest_offset = 0;
1945 }
1946
1947 /* copy in packet. ugh */
1948 len = iov_from_buf(sg, elem->in_num, guest_offset,
1949 buf + offset, size - offset);
1950 total += len;
1951 offset += len;
1952 /* If buffers can't be merged, at this point we
1953 * must have consumed the complete packet.
1954 * Otherwise, drop it. */
1955 if (!n->mergeable_rx_bufs && offset < size) {
1956 virtqueue_unpop(q->rx_vq, elem, total);
1957 g_free(elem);
1958 err = size;
1959 goto err;
1960 }
1961
1962 elems[i] = elem;
1963 lens[i] = total;
1964 i++;
1965 }
1966
1967 if (mhdr_cnt) {
1968 virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
1969 iov_from_buf(mhdr_sg, mhdr_cnt,
1970 0,
1971 &extra_hdr.hdr.num_buffers,
1972 sizeof extra_hdr.hdr.num_buffers);
1973 }
1974
1975 for (j = 0; j < i; j++) {
1976 /* signal other side */
1977 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1978 g_free(elems[j]);
1979 }
1980
1981 virtqueue_flush(q->rx_vq, i);
1982 virtio_notify(vdev, q->rx_vq);
1983
1984 return size;
1985
1986 err:
1987 for (j = 0; j < i; j++) {
1988 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1989 g_free(elems[j]);
1990 }
1991
1992 return err;
1993 }
1994
virtio_net_do_receive(NetClientState * nc,const uint8_t * buf,size_t size)1995 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1996 size_t size)
1997 {
1998 RCU_READ_LOCK_GUARD();
1999
2000 return virtio_net_receive_rcu(nc, buf, size);
2001 }
2002
2003 /*
2004 * Accessors to read and write the IP packet data length field. This
2005 * is a potentially unaligned network-byte-order 16 bit unsigned integer
2006 * pointed to by unit->ip_len.
2007 */
read_unit_ip_len(VirtioNetRscUnit * unit)2008 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2009 {
2010 return lduw_be_p(unit->ip_plen);
2011 }
2012
write_unit_ip_len(VirtioNetRscUnit * unit,uint16_t l)2013 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2014 {
2015 stw_be_p(unit->ip_plen, l);
2016 }
2017
virtio_net_rsc_extract_unit4(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2018 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2019 const uint8_t *buf,
2020 VirtioNetRscUnit *unit)
2021 {
2022 uint16_t ip_hdrlen;
2023 struct ip_header *ip;
2024
2025 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2026 + sizeof(struct eth_header));
2027 unit->ip = (void *)ip;
2028 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2029 unit->ip_plen = &ip->ip_len;
2030 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2031 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2032 unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2033 }
2034
virtio_net_rsc_extract_unit6(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2035 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2036 const uint8_t *buf,
2037 VirtioNetRscUnit *unit)
2038 {
2039 struct ip6_header *ip6;
2040
2041 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2042 + sizeof(struct eth_header));
2043 unit->ip = ip6;
2044 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2045 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2046 + sizeof(struct ip6_header));
2047 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2048
2049 /* There is a difference between payload length in ipv4 and v6,
2050 ip header is excluded in ipv6 */
2051 unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2052 }
2053
virtio_net_rsc_drain_seg(VirtioNetRscChain * chain,VirtioNetRscSeg * seg)2054 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2055 VirtioNetRscSeg *seg)
2056 {
2057 int ret;
2058 struct virtio_net_hdr_v1 *h;
2059
2060 h = (struct virtio_net_hdr_v1 *)seg->buf;
2061 h->flags = 0;
2062 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2063
2064 if (seg->is_coalesced) {
2065 h->rsc.segments = seg->packets;
2066 h->rsc.dup_acks = seg->dup_ack;
2067 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2068 if (chain->proto == ETH_P_IP) {
2069 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2070 } else {
2071 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2072 }
2073 }
2074
2075 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2076 QTAILQ_REMOVE(&chain->buffers, seg, next);
2077 g_free(seg->buf);
2078 g_free(seg);
2079
2080 return ret;
2081 }
2082
virtio_net_rsc_purge(void * opq)2083 static void virtio_net_rsc_purge(void *opq)
2084 {
2085 VirtioNetRscSeg *seg, *rn;
2086 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2087
2088 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2089 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2090 chain->stat.purge_failed++;
2091 continue;
2092 }
2093 }
2094
2095 chain->stat.timer++;
2096 if (!QTAILQ_EMPTY(&chain->buffers)) {
2097 timer_mod(chain->drain_timer,
2098 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2099 }
2100 }
2101
virtio_net_rsc_cleanup(VirtIONet * n)2102 static void virtio_net_rsc_cleanup(VirtIONet *n)
2103 {
2104 VirtioNetRscChain *chain, *rn_chain;
2105 VirtioNetRscSeg *seg, *rn_seg;
2106
2107 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2108 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2109 QTAILQ_REMOVE(&chain->buffers, seg, next);
2110 g_free(seg->buf);
2111 g_free(seg);
2112 }
2113
2114 timer_free(chain->drain_timer);
2115 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2116 g_free(chain);
2117 }
2118 }
2119
virtio_net_rsc_cache_buf(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2120 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2121 NetClientState *nc,
2122 const uint8_t *buf, size_t size)
2123 {
2124 uint16_t hdr_len;
2125 VirtioNetRscSeg *seg;
2126
2127 hdr_len = chain->n->guest_hdr_len;
2128 seg = g_new(VirtioNetRscSeg, 1);
2129 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2130 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2131 memcpy(seg->buf, buf, size);
2132 seg->size = size;
2133 seg->packets = 1;
2134 seg->dup_ack = 0;
2135 seg->is_coalesced = 0;
2136 seg->nc = nc;
2137
2138 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2139 chain->stat.cache++;
2140
2141 switch (chain->proto) {
2142 case ETH_P_IP:
2143 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2144 break;
2145 case ETH_P_IPV6:
2146 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2147 break;
2148 default:
2149 g_assert_not_reached();
2150 }
2151 }
2152
virtio_net_rsc_handle_ack(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,struct tcp_header * n_tcp,struct tcp_header * o_tcp)2153 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2154 VirtioNetRscSeg *seg,
2155 const uint8_t *buf,
2156 struct tcp_header *n_tcp,
2157 struct tcp_header *o_tcp)
2158 {
2159 uint32_t nack, oack;
2160 uint16_t nwin, owin;
2161
2162 nack = htonl(n_tcp->th_ack);
2163 nwin = htons(n_tcp->th_win);
2164 oack = htonl(o_tcp->th_ack);
2165 owin = htons(o_tcp->th_win);
2166
2167 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2168 chain->stat.ack_out_of_win++;
2169 return RSC_FINAL;
2170 } else if (nack == oack) {
2171 /* duplicated ack or window probe */
2172 if (nwin == owin) {
2173 /* duplicated ack, add dup ack count due to whql test up to 1 */
2174 chain->stat.dup_ack++;
2175 return RSC_FINAL;
2176 } else {
2177 /* Coalesce window update */
2178 o_tcp->th_win = n_tcp->th_win;
2179 chain->stat.win_update++;
2180 return RSC_COALESCE;
2181 }
2182 } else {
2183 /* pure ack, go to 'C', finalize*/
2184 chain->stat.pure_ack++;
2185 return RSC_FINAL;
2186 }
2187 }
2188
virtio_net_rsc_coalesce_data(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,VirtioNetRscUnit * n_unit)2189 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2190 VirtioNetRscSeg *seg,
2191 const uint8_t *buf,
2192 VirtioNetRscUnit *n_unit)
2193 {
2194 void *data;
2195 uint16_t o_ip_len;
2196 uint32_t nseq, oseq;
2197 VirtioNetRscUnit *o_unit;
2198
2199 o_unit = &seg->unit;
2200 o_ip_len = read_unit_ip_len(o_unit);
2201 nseq = htonl(n_unit->tcp->th_seq);
2202 oseq = htonl(o_unit->tcp->th_seq);
2203
2204 /* out of order or retransmitted. */
2205 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2206 chain->stat.data_out_of_win++;
2207 return RSC_FINAL;
2208 }
2209
2210 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2211 if (nseq == oseq) {
2212 if ((o_unit->payload == 0) && n_unit->payload) {
2213 /* From no payload to payload, normal case, not a dup ack or etc */
2214 chain->stat.data_after_pure_ack++;
2215 goto coalesce;
2216 } else {
2217 return virtio_net_rsc_handle_ack(chain, seg, buf,
2218 n_unit->tcp, o_unit->tcp);
2219 }
2220 } else if ((nseq - oseq) != o_unit->payload) {
2221 /* Not a consistent packet, out of order */
2222 chain->stat.data_out_of_order++;
2223 return RSC_FINAL;
2224 } else {
2225 coalesce:
2226 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2227 chain->stat.over_size++;
2228 return RSC_FINAL;
2229 }
2230
2231 /* Here comes the right data, the payload length in v4/v6 is different,
2232 so use the field value to update and record the new data len */
2233 o_unit->payload += n_unit->payload; /* update new data len */
2234
2235 /* update field in ip header */
2236 write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2237
2238 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2239 for windows guest, while this may change the behavior for linux
2240 guest (only if it uses RSC feature). */
2241 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2242
2243 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2244 o_unit->tcp->th_win = n_unit->tcp->th_win;
2245
2246 memmove(seg->buf + seg->size, data, n_unit->payload);
2247 seg->size += n_unit->payload;
2248 seg->packets++;
2249 chain->stat.coalesced++;
2250 return RSC_COALESCE;
2251 }
2252 }
2253
virtio_net_rsc_coalesce4(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2254 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2255 VirtioNetRscSeg *seg,
2256 const uint8_t *buf, size_t size,
2257 VirtioNetRscUnit *unit)
2258 {
2259 struct ip_header *ip1, *ip2;
2260
2261 ip1 = (struct ip_header *)(unit->ip);
2262 ip2 = (struct ip_header *)(seg->unit.ip);
2263 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2264 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2265 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2266 chain->stat.no_match++;
2267 return RSC_NO_MATCH;
2268 }
2269
2270 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2271 }
2272
virtio_net_rsc_coalesce6(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2273 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2274 VirtioNetRscSeg *seg,
2275 const uint8_t *buf, size_t size,
2276 VirtioNetRscUnit *unit)
2277 {
2278 struct ip6_header *ip1, *ip2;
2279
2280 ip1 = (struct ip6_header *)(unit->ip);
2281 ip2 = (struct ip6_header *)(seg->unit.ip);
2282 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2283 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2284 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2285 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2286 chain->stat.no_match++;
2287 return RSC_NO_MATCH;
2288 }
2289
2290 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2291 }
2292
2293 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2294 * to prevent out of order */
virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain * chain,struct tcp_header * tcp)2295 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2296 struct tcp_header *tcp)
2297 {
2298 uint16_t tcp_hdr;
2299 uint16_t tcp_flag;
2300
2301 tcp_flag = htons(tcp->th_offset_flags);
2302 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2303 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2304 if (tcp_flag & TH_SYN) {
2305 chain->stat.tcp_syn++;
2306 return RSC_BYPASS;
2307 }
2308
2309 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2310 chain->stat.tcp_ctrl_drain++;
2311 return RSC_FINAL;
2312 }
2313
2314 if (tcp_hdr > sizeof(struct tcp_header)) {
2315 chain->stat.tcp_all_opt++;
2316 return RSC_FINAL;
2317 }
2318
2319 return RSC_CANDIDATE;
2320 }
2321
virtio_net_rsc_do_coalesce(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2322 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2323 NetClientState *nc,
2324 const uint8_t *buf, size_t size,
2325 VirtioNetRscUnit *unit)
2326 {
2327 int ret;
2328 VirtioNetRscSeg *seg, *nseg;
2329
2330 if (QTAILQ_EMPTY(&chain->buffers)) {
2331 chain->stat.empty_cache++;
2332 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2333 timer_mod(chain->drain_timer,
2334 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2335 return size;
2336 }
2337
2338 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2339 if (chain->proto == ETH_P_IP) {
2340 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2341 } else {
2342 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2343 }
2344
2345 if (ret == RSC_FINAL) {
2346 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2347 /* Send failed */
2348 chain->stat.final_failed++;
2349 return 0;
2350 }
2351
2352 /* Send current packet */
2353 return virtio_net_do_receive(nc, buf, size);
2354 } else if (ret == RSC_NO_MATCH) {
2355 continue;
2356 } else {
2357 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2358 seg->is_coalesced = 1;
2359 return size;
2360 }
2361 }
2362
2363 chain->stat.no_match_cache++;
2364 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2365 return size;
2366 }
2367
2368 /* Drain a connection data, this is to avoid out of order segments */
virtio_net_rsc_drain_flow(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,uint16_t ip_start,uint16_t ip_size,uint16_t tcp_port)2369 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2370 NetClientState *nc,
2371 const uint8_t *buf, size_t size,
2372 uint16_t ip_start, uint16_t ip_size,
2373 uint16_t tcp_port)
2374 {
2375 VirtioNetRscSeg *seg, *nseg;
2376 uint32_t ppair1, ppair2;
2377
2378 ppair1 = *(uint32_t *)(buf + tcp_port);
2379 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2380 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2381 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2382 || (ppair1 != ppair2)) {
2383 continue;
2384 }
2385 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2386 chain->stat.drain_failed++;
2387 }
2388
2389 break;
2390 }
2391
2392 return virtio_net_do_receive(nc, buf, size);
2393 }
2394
virtio_net_rsc_sanity_check4(VirtioNetRscChain * chain,struct ip_header * ip,const uint8_t * buf,size_t size)2395 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2396 struct ip_header *ip,
2397 const uint8_t *buf, size_t size)
2398 {
2399 uint16_t ip_len;
2400
2401 /* Not an ipv4 packet */
2402 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2403 chain->stat.ip_option++;
2404 return RSC_BYPASS;
2405 }
2406
2407 /* Don't handle packets with ip option */
2408 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2409 chain->stat.ip_option++;
2410 return RSC_BYPASS;
2411 }
2412
2413 if (ip->ip_p != IPPROTO_TCP) {
2414 chain->stat.bypass_not_tcp++;
2415 return RSC_BYPASS;
2416 }
2417
2418 /* Don't handle packets with ip fragment */
2419 if (!(htons(ip->ip_off) & IP_DF)) {
2420 chain->stat.ip_frag++;
2421 return RSC_BYPASS;
2422 }
2423
2424 /* Don't handle packets with ecn flag */
2425 if (IPTOS_ECN(ip->ip_tos)) {
2426 chain->stat.ip_ecn++;
2427 return RSC_BYPASS;
2428 }
2429
2430 ip_len = htons(ip->ip_len);
2431 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2432 || ip_len > (size - chain->n->guest_hdr_len -
2433 sizeof(struct eth_header))) {
2434 chain->stat.ip_hacked++;
2435 return RSC_BYPASS;
2436 }
2437
2438 return RSC_CANDIDATE;
2439 }
2440
virtio_net_rsc_receive4(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2441 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2442 NetClientState *nc,
2443 const uint8_t *buf, size_t size)
2444 {
2445 int32_t ret;
2446 uint16_t hdr_len;
2447 VirtioNetRscUnit unit;
2448
2449 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2450
2451 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2452 + sizeof(struct tcp_header))) {
2453 chain->stat.bypass_not_tcp++;
2454 return virtio_net_do_receive(nc, buf, size);
2455 }
2456
2457 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2458 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2459 != RSC_CANDIDATE) {
2460 return virtio_net_do_receive(nc, buf, size);
2461 }
2462
2463 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2464 if (ret == RSC_BYPASS) {
2465 return virtio_net_do_receive(nc, buf, size);
2466 } else if (ret == RSC_FINAL) {
2467 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2468 ((hdr_len + sizeof(struct eth_header)) + 12),
2469 VIRTIO_NET_IP4_ADDR_SIZE,
2470 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2471 }
2472
2473 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2474 }
2475
virtio_net_rsc_sanity_check6(VirtioNetRscChain * chain,struct ip6_header * ip6,const uint8_t * buf,size_t size)2476 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2477 struct ip6_header *ip6,
2478 const uint8_t *buf, size_t size)
2479 {
2480 uint16_t ip_len;
2481
2482 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2483 != IP_HEADER_VERSION_6) {
2484 return RSC_BYPASS;
2485 }
2486
2487 /* Both option and protocol is checked in this */
2488 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2489 chain->stat.bypass_not_tcp++;
2490 return RSC_BYPASS;
2491 }
2492
2493 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2494 if (ip_len < sizeof(struct tcp_header) ||
2495 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2496 - sizeof(struct ip6_header))) {
2497 chain->stat.ip_hacked++;
2498 return RSC_BYPASS;
2499 }
2500
2501 /* Don't handle packets with ecn flag */
2502 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2503 chain->stat.ip_ecn++;
2504 return RSC_BYPASS;
2505 }
2506
2507 return RSC_CANDIDATE;
2508 }
2509
virtio_net_rsc_receive6(void * opq,NetClientState * nc,const uint8_t * buf,size_t size)2510 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2511 const uint8_t *buf, size_t size)
2512 {
2513 int32_t ret;
2514 uint16_t hdr_len;
2515 VirtioNetRscChain *chain;
2516 VirtioNetRscUnit unit;
2517
2518 chain = opq;
2519 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520
2521 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2522 + sizeof(tcp_header))) {
2523 return virtio_net_do_receive(nc, buf, size);
2524 }
2525
2526 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2527 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2528 unit.ip, buf, size)) {
2529 return virtio_net_do_receive(nc, buf, size);
2530 }
2531
2532 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2533 if (ret == RSC_BYPASS) {
2534 return virtio_net_do_receive(nc, buf, size);
2535 } else if (ret == RSC_FINAL) {
2536 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2537 ((hdr_len + sizeof(struct eth_header)) + 8),
2538 VIRTIO_NET_IP6_ADDR_SIZE,
2539 hdr_len + sizeof(struct eth_header)
2540 + sizeof(struct ip6_header));
2541 }
2542
2543 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544 }
2545
virtio_net_rsc_lookup_chain(VirtIONet * n,NetClientState * nc,uint16_t proto)2546 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2547 NetClientState *nc,
2548 uint16_t proto)
2549 {
2550 VirtioNetRscChain *chain;
2551
2552 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2553 return NULL;
2554 }
2555
2556 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2557 if (chain->proto == proto) {
2558 return chain;
2559 }
2560 }
2561
2562 chain = g_malloc(sizeof(*chain));
2563 chain->n = n;
2564 chain->proto = proto;
2565 if (proto == (uint16_t)ETH_P_IP) {
2566 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2567 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2568 } else {
2569 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2570 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2571 }
2572 chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2573 virtio_net_rsc_purge, chain);
2574 memset(&chain->stat, 0, sizeof(chain->stat));
2575
2576 QTAILQ_INIT(&chain->buffers);
2577 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2578
2579 return chain;
2580 }
2581
virtio_net_rsc_receive(NetClientState * nc,const uint8_t * buf,size_t size)2582 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2583 const uint8_t *buf,
2584 size_t size)
2585 {
2586 uint16_t proto;
2587 VirtioNetRscChain *chain;
2588 struct eth_header *eth;
2589 VirtIONet *n;
2590
2591 n = qemu_get_nic_opaque(nc);
2592 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2593 return virtio_net_do_receive(nc, buf, size);
2594 }
2595
2596 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2597 proto = htons(eth->h_proto);
2598
2599 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2600 if (chain) {
2601 chain->stat.received++;
2602 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2603 return virtio_net_rsc_receive4(chain, nc, buf, size);
2604 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2605 return virtio_net_rsc_receive6(chain, nc, buf, size);
2606 }
2607 }
2608 return virtio_net_do_receive(nc, buf, size);
2609 }
2610
virtio_net_receive(NetClientState * nc,const uint8_t * buf,size_t size)2611 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2612 size_t size)
2613 {
2614 VirtIONet *n = qemu_get_nic_opaque(nc);
2615 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2616 return virtio_net_rsc_receive(nc, buf, size);
2617 } else {
2618 return virtio_net_do_receive(nc, buf, size);
2619 }
2620 }
2621
2622 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2623
virtio_net_tx_complete(NetClientState * nc,ssize_t len)2624 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2625 {
2626 VirtIONet *n = qemu_get_nic_opaque(nc);
2627 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2628 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2629 int ret;
2630
2631 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2632 virtio_notify(vdev, q->tx_vq);
2633
2634 g_free(q->async_tx.elem);
2635 q->async_tx.elem = NULL;
2636
2637 virtio_queue_set_notification(q->tx_vq, 1);
2638 ret = virtio_net_flush_tx(q);
2639 if (ret >= n->tx_burst) {
2640 /*
2641 * the flush has been stopped by tx_burst
2642 * we will not receive notification for the
2643 * remainining part, so re-schedule
2644 */
2645 virtio_queue_set_notification(q->tx_vq, 0);
2646 if (q->tx_bh) {
2647 replay_bh_schedule_event(q->tx_bh);
2648 } else {
2649 timer_mod(q->tx_timer,
2650 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2651 }
2652 q->tx_waiting = 1;
2653 }
2654 }
2655
2656 /* TX */
virtio_net_flush_tx(VirtIONetQueue * q)2657 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2658 {
2659 VirtIONet *n = q->n;
2660 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2661 VirtQueueElement *elem;
2662 int32_t num_packets = 0;
2663 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2664 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2665 return num_packets;
2666 }
2667
2668 if (q->async_tx.elem) {
2669 virtio_queue_set_notification(q->tx_vq, 0);
2670 return num_packets;
2671 }
2672
2673 for (;;) {
2674 ssize_t ret;
2675 unsigned int out_num;
2676 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2677 struct virtio_net_hdr vhdr;
2678
2679 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2680 if (!elem) {
2681 break;
2682 }
2683
2684 out_num = elem->out_num;
2685 out_sg = elem->out_sg;
2686 if (out_num < 1) {
2687 virtio_error(vdev, "virtio-net header not in first element");
2688 goto detach;
2689 }
2690
2691 if (n->needs_vnet_hdr_swap) {
2692 if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2693 sizeof(vhdr)) {
2694 virtio_error(vdev, "virtio-net header incorrect");
2695 goto detach;
2696 }
2697 virtio_net_hdr_swap(vdev, &vhdr);
2698 sg2[0].iov_base = &vhdr;
2699 sg2[0].iov_len = sizeof(vhdr);
2700 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2701 sizeof(vhdr), -1);
2702 if (out_num == VIRTQUEUE_MAX_SIZE) {
2703 goto drop;
2704 }
2705 out_num += 1;
2706 out_sg = sg2;
2707 }
2708 /*
2709 * If host wants to see the guest header as is, we can
2710 * pass it on unchanged. Otherwise, copy just the parts
2711 * that host is interested in.
2712 */
2713 assert(n->host_hdr_len <= n->guest_hdr_len);
2714 if (n->host_hdr_len != n->guest_hdr_len) {
2715 if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2716 virtio_error(vdev, "virtio-net header is invalid");
2717 goto detach;
2718 }
2719 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2720 out_sg, out_num,
2721 0, n->host_hdr_len);
2722 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2723 out_sg, out_num,
2724 n->guest_hdr_len, -1);
2725 out_num = sg_num;
2726 out_sg = sg;
2727
2728 if (out_num < 1) {
2729 virtio_error(vdev, "virtio-net nothing to send");
2730 goto detach;
2731 }
2732 }
2733
2734 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2735 out_sg, out_num, virtio_net_tx_complete);
2736 if (ret == 0) {
2737 virtio_queue_set_notification(q->tx_vq, 0);
2738 q->async_tx.elem = elem;
2739 return -EBUSY;
2740 }
2741
2742 drop:
2743 virtqueue_push(q->tx_vq, elem, 0);
2744 virtio_notify(vdev, q->tx_vq);
2745 g_free(elem);
2746
2747 if (++num_packets >= n->tx_burst) {
2748 break;
2749 }
2750 }
2751 return num_packets;
2752
2753 detach:
2754 virtqueue_detach_element(q->tx_vq, elem, 0);
2755 g_free(elem);
2756 return -EINVAL;
2757 }
2758
2759 static void virtio_net_tx_timer(void *opaque);
2760
virtio_net_handle_tx_timer(VirtIODevice * vdev,VirtQueue * vq)2761 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2762 {
2763 VirtIONet *n = VIRTIO_NET(vdev);
2764 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2765
2766 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2767 virtio_net_drop_tx_queue_data(vdev, vq);
2768 return;
2769 }
2770
2771 /* This happens when device was stopped but VCPU wasn't. */
2772 if (!vdev->vm_running) {
2773 q->tx_waiting = 1;
2774 return;
2775 }
2776
2777 if (q->tx_waiting) {
2778 /* We already have queued packets, immediately flush */
2779 timer_del(q->tx_timer);
2780 virtio_net_tx_timer(q);
2781 } else {
2782 /* re-arm timer to flush it (and more) on next tick */
2783 timer_mod(q->tx_timer,
2784 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2785 q->tx_waiting = 1;
2786 virtio_queue_set_notification(vq, 0);
2787 }
2788 }
2789
virtio_net_handle_tx_bh(VirtIODevice * vdev,VirtQueue * vq)2790 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2791 {
2792 VirtIONet *n = VIRTIO_NET(vdev);
2793 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2794
2795 if (unlikely(n->vhost_started)) {
2796 return;
2797 }
2798
2799 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2800 virtio_net_drop_tx_queue_data(vdev, vq);
2801 return;
2802 }
2803
2804 if (unlikely(q->tx_waiting)) {
2805 return;
2806 }
2807 q->tx_waiting = 1;
2808 /* This happens when device was stopped but VCPU wasn't. */
2809 if (!vdev->vm_running) {
2810 return;
2811 }
2812 virtio_queue_set_notification(vq, 0);
2813 replay_bh_schedule_event(q->tx_bh);
2814 }
2815
virtio_net_tx_timer(void * opaque)2816 static void virtio_net_tx_timer(void *opaque)
2817 {
2818 VirtIONetQueue *q = opaque;
2819 VirtIONet *n = q->n;
2820 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2821 int ret;
2822
2823 /* This happens when device was stopped but BH wasn't. */
2824 if (!vdev->vm_running) {
2825 /* Make sure tx waiting is set, so we'll run when restarted. */
2826 assert(q->tx_waiting);
2827 return;
2828 }
2829
2830 q->tx_waiting = 0;
2831
2832 /* Just in case the driver is not ready on more */
2833 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2834 return;
2835 }
2836
2837 ret = virtio_net_flush_tx(q);
2838 if (ret == -EBUSY || ret == -EINVAL) {
2839 return;
2840 }
2841 /*
2842 * If we flush a full burst of packets, assume there are
2843 * more coming and immediately rearm
2844 */
2845 if (ret >= n->tx_burst) {
2846 q->tx_waiting = 1;
2847 timer_mod(q->tx_timer,
2848 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2849 return;
2850 }
2851 /*
2852 * If less than a full burst, re-enable notification and flush
2853 * anything that may have come in while we weren't looking. If
2854 * we find something, assume the guest is still active and rearm
2855 */
2856 virtio_queue_set_notification(q->tx_vq, 1);
2857 ret = virtio_net_flush_tx(q);
2858 if (ret > 0) {
2859 virtio_queue_set_notification(q->tx_vq, 0);
2860 q->tx_waiting = 1;
2861 timer_mod(q->tx_timer,
2862 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2863 }
2864 }
2865
virtio_net_tx_bh(void * opaque)2866 static void virtio_net_tx_bh(void *opaque)
2867 {
2868 VirtIONetQueue *q = opaque;
2869 VirtIONet *n = q->n;
2870 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2871 int32_t ret;
2872
2873 /* This happens when device was stopped but BH wasn't. */
2874 if (!vdev->vm_running) {
2875 /* Make sure tx waiting is set, so we'll run when restarted. */
2876 assert(q->tx_waiting);
2877 return;
2878 }
2879
2880 q->tx_waiting = 0;
2881
2882 /* Just in case the driver is not ready on more */
2883 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2884 return;
2885 }
2886
2887 ret = virtio_net_flush_tx(q);
2888 if (ret == -EBUSY || ret == -EINVAL) {
2889 return; /* Notification re-enable handled by tx_complete or device
2890 * broken */
2891 }
2892
2893 /* If we flush a full burst of packets, assume there are
2894 * more coming and immediately reschedule */
2895 if (ret >= n->tx_burst) {
2896 replay_bh_schedule_event(q->tx_bh);
2897 q->tx_waiting = 1;
2898 return;
2899 }
2900
2901 /* If less than a full burst, re-enable notification and flush
2902 * anything that may have come in while we weren't looking. If
2903 * we find something, assume the guest is still active and reschedule */
2904 virtio_queue_set_notification(q->tx_vq, 1);
2905 ret = virtio_net_flush_tx(q);
2906 if (ret == -EINVAL) {
2907 return;
2908 } else if (ret > 0) {
2909 virtio_queue_set_notification(q->tx_vq, 0);
2910 replay_bh_schedule_event(q->tx_bh);
2911 q->tx_waiting = 1;
2912 }
2913 }
2914
virtio_net_add_queue(VirtIONet * n,int index)2915 static void virtio_net_add_queue(VirtIONet *n, int index)
2916 {
2917 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2918
2919 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2920 virtio_net_handle_rx);
2921
2922 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2923 n->vqs[index].tx_vq =
2924 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2925 virtio_net_handle_tx_timer);
2926 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2927 virtio_net_tx_timer,
2928 &n->vqs[index]);
2929 } else {
2930 n->vqs[index].tx_vq =
2931 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2932 virtio_net_handle_tx_bh);
2933 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2934 &DEVICE(vdev)->mem_reentrancy_guard);
2935 }
2936
2937 n->vqs[index].tx_waiting = 0;
2938 n->vqs[index].n = n;
2939 }
2940
virtio_net_del_queue(VirtIONet * n,int index)2941 static void virtio_net_del_queue(VirtIONet *n, int index)
2942 {
2943 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2944 VirtIONetQueue *q = &n->vqs[index];
2945 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2946
2947 qemu_purge_queued_packets(nc);
2948
2949 virtio_del_queue(vdev, index * 2);
2950 if (q->tx_timer) {
2951 timer_free(q->tx_timer);
2952 q->tx_timer = NULL;
2953 } else {
2954 qemu_bh_delete(q->tx_bh);
2955 q->tx_bh = NULL;
2956 }
2957 q->tx_waiting = 0;
2958 virtio_del_queue(vdev, index * 2 + 1);
2959 }
2960
virtio_net_change_num_queues(VirtIONet * n,int new_num_queues)2961 static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues)
2962 {
2963 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2964 int old_num_queues = virtio_get_num_queues(vdev);
2965 int i;
2966
2967 assert(old_num_queues >= 3);
2968 assert(old_num_queues % 2 == 1);
2969
2970 if (old_num_queues == new_num_queues) {
2971 return;
2972 }
2973
2974 /*
2975 * We always need to remove and add ctrl vq if
2976 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2977 * and then we only enter one of the following two loops.
2978 */
2979 virtio_del_queue(vdev, old_num_queues - 1);
2980
2981 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2982 /* new_num_queues < old_num_queues */
2983 virtio_net_del_queue(n, i / 2);
2984 }
2985
2986 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2987 /* new_num_queues > old_num_queues */
2988 virtio_net_add_queue(n, i / 2);
2989 }
2990
2991 /* add ctrl_vq last */
2992 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2993 }
2994
virtio_net_set_multiqueue(VirtIONet * n,int multiqueue)2995 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2996 {
2997 int max = multiqueue ? n->max_queue_pairs : 1;
2998
2999 n->multiqueue = multiqueue;
3000 virtio_net_change_num_queues(n, max * 2 + 1);
3001
3002 virtio_net_set_queue_pairs(n);
3003 }
3004
virtio_net_pre_load_queues(VirtIODevice * vdev,uint32_t n)3005 static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n)
3006 {
3007 virtio_net_change_num_queues(VIRTIO_NET(vdev), n);
3008
3009 return 0;
3010 }
3011
virtio_net_get_features(VirtIODevice * vdev,uint64_t features,Error ** errp)3012 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
3013 Error **errp)
3014 {
3015 VirtIONet *n = VIRTIO_NET(vdev);
3016 NetClientState *nc = qemu_get_queue(n->nic);
3017 uint32_t supported_hash_types = n->rss_data.supported_hash_types;
3018 uint32_t peer_hash_types = n->rss_data.peer_hash_types;
3019 bool use_own_hash =
3020 (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) ==
3021 supported_hash_types;
3022 bool use_peer_hash =
3023 n->rss_data.peer_hash_available &&
3024 (supported_hash_types & peer_hash_types) == supported_hash_types;
3025
3026 /* Firstly sync all virtio-net possible supported features */
3027 features |= n->host_features;
3028
3029 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
3030
3031 if (!peer_has_vnet_hdr(n)) {
3032 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
3033 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
3034 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
3035 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
3036
3037 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
3038 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
3039 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
3040 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
3041
3042 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
3043 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
3044 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
3045
3046 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3047 }
3048
3049 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
3050 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
3051 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3052 }
3053
3054 if (!peer_has_uso(n)) {
3055 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
3056 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
3057 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
3058 }
3059
3060 if (!get_vhost_net(nc->peer)) {
3061 if (!use_own_hash) {
3062 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3063 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
3064 } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) {
3065 virtio_net_load_ebpf(n, errp);
3066 }
3067
3068 return features;
3069 }
3070
3071 if (!use_peer_hash) {
3072 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3073
3074 if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
3075 if (!virtio_net_load_ebpf(n, errp)) {
3076 return features;
3077 }
3078
3079 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
3080 }
3081 }
3082
3083 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
3084 vdev->backend_features = features;
3085
3086 if (n->mtu_bypass_backend &&
3087 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
3088 features |= (1ULL << VIRTIO_NET_F_MTU);
3089 }
3090
3091 /*
3092 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
3093 * enabled. This happens in the vDPA case.
3094 *
3095 * Make sure the feature set is not incoherent, as the driver could refuse
3096 * to start.
3097 *
3098 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
3099 * helping guest to notify the new location with vDPA devices that does not
3100 * support it.
3101 */
3102 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
3103 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
3104 }
3105
3106 return features;
3107 }
3108
virtio_net_post_load_device(void * opaque,int version_id)3109 static int virtio_net_post_load_device(void *opaque, int version_id)
3110 {
3111 VirtIONet *n = opaque;
3112 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3113 int i, link_down;
3114
3115 trace_virtio_net_post_load_device();
3116 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3117 virtio_vdev_has_feature(vdev,
3118 VIRTIO_F_VERSION_1),
3119 virtio_vdev_has_feature(vdev,
3120 VIRTIO_NET_F_HASH_REPORT));
3121
3122 /* MAC_TABLE_ENTRIES may be different from the saved image */
3123 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3124 n->mac_table.in_use = 0;
3125 }
3126
3127 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3128 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3129 }
3130
3131 /*
3132 * curr_guest_offloads will be later overwritten by the
3133 * virtio_set_features_nocheck call done from the virtio_load.
3134 * Here we make sure it is preserved and restored accordingly
3135 * in the virtio_net_post_load_virtio callback.
3136 */
3137 n->saved_guest_offloads = n->curr_guest_offloads;
3138
3139 virtio_net_set_queue_pairs(n);
3140
3141 /* Find the first multicast entry in the saved MAC filter */
3142 for (i = 0; i < n->mac_table.in_use; i++) {
3143 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3144 break;
3145 }
3146 }
3147 n->mac_table.first_multi = i;
3148
3149 /* nc.link_down can't be migrated, so infer link_down according
3150 * to link status bit in n->status */
3151 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3152 for (i = 0; i < n->max_queue_pairs; i++) {
3153 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3154 }
3155
3156 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3157 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3158 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3159 QEMU_CLOCK_VIRTUAL,
3160 virtio_net_announce_timer, n);
3161 if (n->announce_timer.round) {
3162 timer_mod(n->announce_timer.tm,
3163 qemu_clock_get_ms(n->announce_timer.type));
3164 } else {
3165 qemu_announce_timer_del(&n->announce_timer, false);
3166 }
3167 }
3168
3169 virtio_net_commit_rss_config(n);
3170 return 0;
3171 }
3172
virtio_net_post_load_virtio(VirtIODevice * vdev)3173 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3174 {
3175 VirtIONet *n = VIRTIO_NET(vdev);
3176 /*
3177 * The actual needed state is now in saved_guest_offloads,
3178 * see virtio_net_post_load_device for detail.
3179 * Restore it back and apply the desired offloads.
3180 */
3181 n->curr_guest_offloads = n->saved_guest_offloads;
3182 if (peer_has_vnet_hdr(n)) {
3183 virtio_net_apply_guest_offloads(n);
3184 }
3185
3186 return 0;
3187 }
3188
3189 /* tx_waiting field of a VirtIONetQueue */
3190 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3191 .name = "virtio-net-queue-tx_waiting",
3192 .fields = (const VMStateField[]) {
3193 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3194 VMSTATE_END_OF_LIST()
3195 },
3196 };
3197
max_queue_pairs_gt_1(void * opaque,int version_id)3198 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3199 {
3200 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3201 }
3202
has_ctrl_guest_offloads(void * opaque,int version_id)3203 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3204 {
3205 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3206 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3207 }
3208
mac_table_fits(void * opaque,int version_id)3209 static bool mac_table_fits(void *opaque, int version_id)
3210 {
3211 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3212 }
3213
mac_table_doesnt_fit(void * opaque,int version_id)3214 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3215 {
3216 return !mac_table_fits(opaque, version_id);
3217 }
3218
3219 /* This temporary type is shared by all the WITH_TMP methods
3220 * although only some fields are used by each.
3221 */
3222 struct VirtIONetMigTmp {
3223 VirtIONet *parent;
3224 VirtIONetQueue *vqs_1;
3225 uint16_t curr_queue_pairs_1;
3226 uint8_t has_ufo;
3227 uint32_t has_vnet_hdr;
3228 };
3229
3230 /* The 2nd and subsequent tx_waiting flags are loaded later than
3231 * the 1st entry in the queue_pairs and only if there's more than one
3232 * entry. We use the tmp mechanism to calculate a temporary
3233 * pointer and count and also validate the count.
3234 */
3235
virtio_net_tx_waiting_pre_save(void * opaque)3236 static int virtio_net_tx_waiting_pre_save(void *opaque)
3237 {
3238 struct VirtIONetMigTmp *tmp = opaque;
3239
3240 tmp->vqs_1 = tmp->parent->vqs + 1;
3241 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3242 if (tmp->parent->curr_queue_pairs == 0) {
3243 tmp->curr_queue_pairs_1 = 0;
3244 }
3245
3246 return 0;
3247 }
3248
virtio_net_tx_waiting_pre_load(void * opaque)3249 static int virtio_net_tx_waiting_pre_load(void *opaque)
3250 {
3251 struct VirtIONetMigTmp *tmp = opaque;
3252
3253 /* Reuse the pointer setup from save */
3254 virtio_net_tx_waiting_pre_save(opaque);
3255
3256 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3257 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3258 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3259
3260 return -EINVAL;
3261 }
3262
3263 return 0; /* all good */
3264 }
3265
3266 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3267 .name = "virtio-net-tx_waiting",
3268 .pre_load = virtio_net_tx_waiting_pre_load,
3269 .pre_save = virtio_net_tx_waiting_pre_save,
3270 .fields = (const VMStateField[]) {
3271 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3272 curr_queue_pairs_1,
3273 vmstate_virtio_net_queue_tx_waiting,
3274 struct VirtIONetQueue),
3275 VMSTATE_END_OF_LIST()
3276 },
3277 };
3278
3279 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3280 * flag set we need to check that we have it
3281 */
virtio_net_ufo_post_load(void * opaque,int version_id)3282 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3283 {
3284 struct VirtIONetMigTmp *tmp = opaque;
3285
3286 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3287 error_report("virtio-net: saved image requires TUN_F_UFO support");
3288 return -EINVAL;
3289 }
3290
3291 return 0;
3292 }
3293
virtio_net_ufo_pre_save(void * opaque)3294 static int virtio_net_ufo_pre_save(void *opaque)
3295 {
3296 struct VirtIONetMigTmp *tmp = opaque;
3297
3298 tmp->has_ufo = tmp->parent->has_ufo;
3299
3300 return 0;
3301 }
3302
3303 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3304 .name = "virtio-net-ufo",
3305 .post_load = virtio_net_ufo_post_load,
3306 .pre_save = virtio_net_ufo_pre_save,
3307 .fields = (const VMStateField[]) {
3308 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3309 VMSTATE_END_OF_LIST()
3310 },
3311 };
3312
3313 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3314 * flag set we need to check that we have it
3315 */
virtio_net_vnet_post_load(void * opaque,int version_id)3316 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3317 {
3318 struct VirtIONetMigTmp *tmp = opaque;
3319
3320 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3321 error_report("virtio-net: saved image requires vnet_hdr=on");
3322 return -EINVAL;
3323 }
3324
3325 return 0;
3326 }
3327
virtio_net_vnet_pre_save(void * opaque)3328 static int virtio_net_vnet_pre_save(void *opaque)
3329 {
3330 struct VirtIONetMigTmp *tmp = opaque;
3331
3332 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3333
3334 return 0;
3335 }
3336
3337 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3338 .name = "virtio-net-vnet",
3339 .post_load = virtio_net_vnet_post_load,
3340 .pre_save = virtio_net_vnet_pre_save,
3341 .fields = (const VMStateField[]) {
3342 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3343 VMSTATE_END_OF_LIST()
3344 },
3345 };
3346
virtio_net_rss_post_load(void * opaque,int version_id)3347 static int virtio_net_rss_post_load(void *opaque, int version_id)
3348 {
3349 VirtIONet *n = VIRTIO_NET(opaque);
3350
3351 if (version_id == 1) {
3352 n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES;
3353 }
3354
3355 return 0;
3356 }
3357
virtio_net_rss_needed(void * opaque)3358 static bool virtio_net_rss_needed(void *opaque)
3359 {
3360 return VIRTIO_NET(opaque)->rss_data.enabled;
3361 }
3362
3363 static const VMStateDescription vmstate_virtio_net_rss = {
3364 .name = "virtio-net-device/rss",
3365 .version_id = 2,
3366 .minimum_version_id = 1,
3367 .post_load = virtio_net_rss_post_load,
3368 .needed = virtio_net_rss_needed,
3369 .fields = (const VMStateField[]) {
3370 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3371 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3372 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3373 VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet),
3374 VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2),
3375 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3376 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3377 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3378 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3379 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3380 rss_data.indirections_len, 0,
3381 vmstate_info_uint16, uint16_t),
3382 VMSTATE_END_OF_LIST()
3383 },
3384 };
3385
virtio_net_get_vhost(VirtIODevice * vdev)3386 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3387 {
3388 VirtIONet *n = VIRTIO_NET(vdev);
3389 NetClientState *nc;
3390 struct vhost_net *net;
3391
3392 if (!n->nic) {
3393 return NULL;
3394 }
3395
3396 nc = qemu_get_queue(n->nic);
3397 if (!nc) {
3398 return NULL;
3399 }
3400
3401 net = get_vhost_net(nc->peer);
3402 if (!net) {
3403 return NULL;
3404 }
3405
3406 return &net->dev;
3407 }
3408
vhost_user_net_save_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)3409 static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size,
3410 const VMStateField *field,
3411 JSONWriter *vmdesc)
3412 {
3413 VirtIONet *n = pv;
3414 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3415 struct vhost_dev *vhdev;
3416 Error *local_error = NULL;
3417 int ret;
3418
3419 vhdev = virtio_net_get_vhost(vdev);
3420 if (vhdev == NULL) {
3421 error_reportf_err(local_error,
3422 "Error getting vhost back-end of %s device %s: ",
3423 vdev->name, vdev->parent_obj.canonical_path);
3424 return -1;
3425 }
3426
3427 ret = vhost_save_backend_state(vhdev, f, &local_error);
3428 if (ret < 0) {
3429 error_reportf_err(local_error,
3430 "Error saving back-end state of %s device %s: ",
3431 vdev->name, vdev->parent_obj.canonical_path);
3432 return ret;
3433 }
3434
3435 return 0;
3436 }
3437
vhost_user_net_load_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field)3438 static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size,
3439 const VMStateField *field)
3440 {
3441 VirtIONet *n = pv;
3442 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3443 struct vhost_dev *vhdev;
3444 Error *local_error = NULL;
3445 int ret;
3446
3447 vhdev = virtio_net_get_vhost(vdev);
3448 if (vhdev == NULL) {
3449 error_reportf_err(local_error,
3450 "Error getting vhost back-end of %s device %s: ",
3451 vdev->name, vdev->parent_obj.canonical_path);
3452 return -1;
3453 }
3454
3455 ret = vhost_load_backend_state(vhdev, f, &local_error);
3456 if (ret < 0) {
3457 error_reportf_err(local_error,
3458 "Error loading back-end state of %s device %s: ",
3459 vdev->name, vdev->parent_obj.canonical_path);
3460 return ret;
3461 }
3462
3463 return 0;
3464 }
3465
vhost_user_net_is_internal_migration(void * opaque)3466 static bool vhost_user_net_is_internal_migration(void *opaque)
3467 {
3468 VirtIONet *n = opaque;
3469 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3470 struct vhost_dev *vhdev;
3471
3472 vhdev = virtio_net_get_vhost(vdev);
3473 if (vhdev == NULL) {
3474 return false;
3475 }
3476
3477 return vhost_supports_device_state(vhdev);
3478 }
3479
3480 static const VMStateDescription vhost_user_net_backend_state = {
3481 .name = "virtio-net-device/backend",
3482 .version_id = 0,
3483 .needed = vhost_user_net_is_internal_migration,
3484 .fields = (const VMStateField[]) {
3485 {
3486 .name = "backend",
3487 .info = &(const VMStateInfo) {
3488 .name = "virtio-net vhost-user backend state",
3489 .get = vhost_user_net_load_state,
3490 .put = vhost_user_net_save_state,
3491 },
3492 },
3493 VMSTATE_END_OF_LIST()
3494 }
3495 };
3496
3497 static const VMStateDescription vmstate_virtio_net_device = {
3498 .name = "virtio-net-device",
3499 .version_id = VIRTIO_NET_VM_VERSION,
3500 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3501 .post_load = virtio_net_post_load_device,
3502 .fields = (const VMStateField[]) {
3503 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3504 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3505 vmstate_virtio_net_queue_tx_waiting,
3506 VirtIONetQueue),
3507 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3508 VMSTATE_UINT16(status, VirtIONet),
3509 VMSTATE_UINT8(promisc, VirtIONet),
3510 VMSTATE_UINT8(allmulti, VirtIONet),
3511 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3512
3513 /* Guarded pair: If it fits we load it, else we throw it away
3514 * - can happen if source has a larger MAC table.; post-load
3515 * sets flags in this case.
3516 */
3517 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3518 0, mac_table_fits, mac_table.in_use,
3519 ETH_ALEN),
3520 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3521 mac_table.in_use, ETH_ALEN),
3522
3523 /* Note: This is an array of uint32's that's always been saved as a
3524 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3525 * but based on the uint.
3526 */
3527 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3528 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3529 vmstate_virtio_net_has_vnet),
3530 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3531 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3532 VMSTATE_UINT8(alluni, VirtIONet),
3533 VMSTATE_UINT8(nomulti, VirtIONet),
3534 VMSTATE_UINT8(nouni, VirtIONet),
3535 VMSTATE_UINT8(nobcast, VirtIONet),
3536 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3537 vmstate_virtio_net_has_ufo),
3538 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3539 vmstate_info_uint16_equal, uint16_t),
3540 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3541 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3542 vmstate_virtio_net_tx_waiting),
3543 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3544 has_ctrl_guest_offloads),
3545 VMSTATE_END_OF_LIST()
3546 },
3547 .subsections = (const VMStateDescription * const []) {
3548 &vmstate_virtio_net_rss,
3549 &vhost_user_net_backend_state,
3550 NULL
3551 }
3552 };
3553
3554 static NetClientInfo net_virtio_info = {
3555 .type = NET_CLIENT_DRIVER_NIC,
3556 .size = sizeof(NICState),
3557 .can_receive = virtio_net_can_receive,
3558 .receive = virtio_net_receive,
3559 .link_status_changed = virtio_net_set_link_status,
3560 .query_rx_filter = virtio_net_query_rxfilter,
3561 .announce = virtio_net_announce,
3562 };
3563
virtio_net_guest_notifier_pending(VirtIODevice * vdev,int idx)3564 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3565 {
3566 VirtIONet *n = VIRTIO_NET(vdev);
3567 NetClientState *nc;
3568 assert(n->vhost_started);
3569 if (!n->multiqueue && idx == 2) {
3570 /* Must guard against invalid features and bogus queue index
3571 * from being set by malicious guest, or penetrated through
3572 * buggy migration stream.
3573 */
3574 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3575 qemu_log_mask(LOG_GUEST_ERROR,
3576 "%s: bogus vq index ignored\n", __func__);
3577 return false;
3578 }
3579 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3580 } else {
3581 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3582 }
3583 /*
3584 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3585 * as the macro of configure interrupt's IDX, If this driver does not
3586 * support, the function will return false
3587 */
3588
3589 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3590 return vhost_net_config_pending(get_vhost_net(nc->peer));
3591 }
3592 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3593 }
3594
virtio_net_guest_notifier_mask(VirtIODevice * vdev,int idx,bool mask)3595 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3596 bool mask)
3597 {
3598 VirtIONet *n = VIRTIO_NET(vdev);
3599 NetClientState *nc;
3600 assert(n->vhost_started);
3601 if (!n->multiqueue && idx == 2) {
3602 /* Must guard against invalid features and bogus queue index
3603 * from being set by malicious guest, or penetrated through
3604 * buggy migration stream.
3605 */
3606 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3607 qemu_log_mask(LOG_GUEST_ERROR,
3608 "%s: bogus vq index ignored\n", __func__);
3609 return;
3610 }
3611 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3612 } else {
3613 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3614 }
3615 /*
3616 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3617 * as the macro of configure interrupt's IDX, If this driver does not
3618 * support, the function will return
3619 */
3620
3621 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3622 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3623 return;
3624 }
3625 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3626 }
3627
virtio_net_set_config_size(VirtIONet * n,uint64_t host_features)3628 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3629 {
3630 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3631
3632 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3633 }
3634
virtio_net_set_netclient_name(VirtIONet * n,const char * name,const char * type)3635 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3636 const char *type)
3637 {
3638 /*
3639 * The name can be NULL, the netclient name will be type.x.
3640 */
3641 assert(type != NULL);
3642
3643 g_free(n->netclient_name);
3644 g_free(n->netclient_type);
3645 n->netclient_name = g_strdup(name);
3646 n->netclient_type = g_strdup(type);
3647 }
3648
failover_unplug_primary(VirtIONet * n,DeviceState * dev)3649 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3650 {
3651 HotplugHandler *hotplug_ctrl;
3652 PCIDevice *pci_dev;
3653 Error *err = NULL;
3654
3655 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3656 if (hotplug_ctrl) {
3657 pci_dev = PCI_DEVICE(dev);
3658 pci_dev->partially_hotplugged = true;
3659 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3660 if (err) {
3661 error_report_err(err);
3662 return false;
3663 }
3664 } else {
3665 return false;
3666 }
3667 return true;
3668 }
3669
failover_replug_primary(VirtIONet * n,DeviceState * dev,Error ** errp)3670 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3671 Error **errp)
3672 {
3673 Error *err = NULL;
3674 HotplugHandler *hotplug_ctrl;
3675 PCIDevice *pdev = PCI_DEVICE(dev);
3676 BusState *primary_bus;
3677
3678 if (!pdev->partially_hotplugged) {
3679 return true;
3680 }
3681 primary_bus = dev->parent_bus;
3682 if (!primary_bus) {
3683 error_setg(errp, "virtio_net: couldn't find primary bus");
3684 return false;
3685 }
3686 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3687 qatomic_set(&n->failover_primary_hidden, false);
3688 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3689 if (hotplug_ctrl) {
3690 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3691 if (err) {
3692 goto out;
3693 }
3694 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3695 }
3696 pdev->partially_hotplugged = false;
3697
3698 out:
3699 error_propagate(errp, err);
3700 return !err;
3701 }
3702
virtio_net_handle_migration_primary(VirtIONet * n,MigrationEvent * e)3703 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3704 {
3705 bool should_be_hidden;
3706 Error *err = NULL;
3707 DeviceState *dev = failover_find_primary_device(n);
3708
3709 if (!dev) {
3710 return;
3711 }
3712
3713 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3714
3715 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3716 if (failover_unplug_primary(n, dev)) {
3717 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3718 qapi_event_send_unplug_primary(dev->id);
3719 qatomic_set(&n->failover_primary_hidden, true);
3720 } else {
3721 warn_report("couldn't unplug primary device");
3722 }
3723 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3724 /* We already unplugged the device let's plug it back */
3725 if (!failover_replug_primary(n, dev, &err)) {
3726 if (err) {
3727 error_report_err(err);
3728 }
3729 }
3730 }
3731 }
3732
virtio_net_migration_state_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)3733 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3734 MigrationEvent *e, Error **errp)
3735 {
3736 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3737 virtio_net_handle_migration_primary(n, e);
3738 return 0;
3739 }
3740
failover_hide_primary_device(DeviceListener * listener,const QDict * device_opts,bool from_json,Error ** errp)3741 static bool failover_hide_primary_device(DeviceListener *listener,
3742 const QDict *device_opts,
3743 bool from_json,
3744 Error **errp)
3745 {
3746 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3747 const char *standby_id;
3748
3749 if (!device_opts) {
3750 return false;
3751 }
3752
3753 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3754 return false;
3755 }
3756
3757 if (!qdict_haskey(device_opts, "id")) {
3758 error_setg(errp, "Device with failover_pair_id needs to have id");
3759 return false;
3760 }
3761
3762 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3763 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3764 return false;
3765 }
3766
3767 /*
3768 * The hide helper can be called several times for a given device.
3769 * Check there is only one primary for a virtio-net device but
3770 * don't duplicate the qdict several times if it's called for the same
3771 * device.
3772 */
3773 if (n->primary_opts) {
3774 const char *old, *new;
3775 /* devices with failover_pair_id always have an id */
3776 old = qdict_get_str(n->primary_opts, "id");
3777 new = qdict_get_str(device_opts, "id");
3778 if (strcmp(old, new) != 0) {
3779 error_setg(errp, "Cannot attach more than one primary device to "
3780 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3781 return false;
3782 }
3783 } else {
3784 n->primary_opts = qdict_clone_shallow(device_opts);
3785 n->primary_opts_from_json = from_json;
3786 }
3787
3788 /* failover_primary_hidden is set during feature negotiation */
3789 return qatomic_read(&n->failover_primary_hidden);
3790 }
3791
virtio_net_device_realize(DeviceState * dev,Error ** errp)3792 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3793 {
3794 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3795 VirtIONet *n = VIRTIO_NET(dev);
3796 NetClientState *nc;
3797 int i;
3798
3799 if (n->net_conf.mtu) {
3800 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3801 }
3802
3803 if (n->net_conf.duplex_str) {
3804 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3805 n->net_conf.duplex = DUPLEX_HALF;
3806 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3807 n->net_conf.duplex = DUPLEX_FULL;
3808 } else {
3809 error_setg(errp, "'duplex' must be 'half' or 'full'");
3810 return;
3811 }
3812 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3813 } else {
3814 n->net_conf.duplex = DUPLEX_UNKNOWN;
3815 }
3816
3817 if (n->net_conf.speed < SPEED_UNKNOWN) {
3818 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3819 return;
3820 }
3821 if (n->net_conf.speed >= 0) {
3822 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3823 }
3824
3825 if (n->failover) {
3826 n->primary_listener.hide_device = failover_hide_primary_device;
3827 qatomic_set(&n->failover_primary_hidden, true);
3828 device_listener_register(&n->primary_listener);
3829 migration_add_notifier(&n->migration_state,
3830 virtio_net_migration_state_notifier);
3831 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3832 }
3833
3834 virtio_net_set_config_size(n, n->host_features);
3835 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3836
3837 /*
3838 * We set a lower limit on RX queue size to what it always was.
3839 * Guests that want a smaller ring can always resize it without
3840 * help from us (using virtio 1 and up).
3841 */
3842 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3843 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3844 !is_power_of_2(n->net_conf.rx_queue_size)) {
3845 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3846 "must be a power of 2 between %d and %d.",
3847 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3848 VIRTQUEUE_MAX_SIZE);
3849 virtio_cleanup(vdev);
3850 return;
3851 }
3852
3853 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3854 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3855 !is_power_of_2(n->net_conf.tx_queue_size)) {
3856 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3857 "must be a power of 2 between %d and %d",
3858 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3859 virtio_net_max_tx_queue_size(n));
3860 virtio_cleanup(vdev);
3861 return;
3862 }
3863
3864 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3865
3866 /*
3867 * Figure out the datapath queue pairs since the backend could
3868 * provide control queue via peers as well.
3869 */
3870 if (n->nic_conf.peers.queues) {
3871 for (i = 0; i < n->max_ncs; i++) {
3872 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3873 ++n->max_queue_pairs;
3874 }
3875 }
3876 }
3877 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3878
3879 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3880 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3881 "must be a positive integer less than %d.",
3882 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3883 virtio_cleanup(vdev);
3884 return;
3885 }
3886 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3887 n->curr_queue_pairs = 1;
3888 n->tx_timeout = n->net_conf.txtimer;
3889
3890 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3891 && strcmp(n->net_conf.tx, "bh")) {
3892 warn_report("virtio-net: "
3893 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3894 n->net_conf.tx);
3895 error_printf("Defaulting to \"bh\"");
3896 }
3897
3898 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3899 n->net_conf.tx_queue_size);
3900
3901 virtio_net_add_queue(n, 0);
3902
3903 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3904 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3905 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3906 n->status = VIRTIO_NET_S_LINK_UP;
3907 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3908 QEMU_CLOCK_VIRTUAL,
3909 virtio_net_announce_timer, n);
3910 n->announce_timer.round = 0;
3911
3912 if (n->netclient_type) {
3913 /*
3914 * Happen when virtio_net_set_netclient_name has been called.
3915 */
3916 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3917 n->netclient_type, n->netclient_name,
3918 &dev->mem_reentrancy_guard, n);
3919 } else {
3920 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3921 object_get_typename(OBJECT(dev)), dev->id,
3922 &dev->mem_reentrancy_guard, n);
3923 }
3924
3925 for (i = 0; i < n->max_queue_pairs; i++) {
3926 n->nic->ncs[i].do_not_pad = true;
3927 }
3928
3929 peer_test_vnet_hdr(n);
3930 if (peer_has_vnet_hdr(n)) {
3931 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3932 } else {
3933 n->host_hdr_len = 0;
3934 }
3935
3936 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3937
3938 n->vqs[0].tx_waiting = 0;
3939 n->tx_burst = n->net_conf.txburst;
3940 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3941 n->promisc = 1; /* for compatibility */
3942
3943 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3944
3945 n->vlans = g_malloc0(MAX_VLAN >> 3);
3946 memset(n->vlans, 0xff, MAX_VLAN >> 3);
3947
3948 nc = qemu_get_queue(n->nic);
3949 nc->rxfilter_notify_enabled = 1;
3950
3951 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3952 struct virtio_net_config netcfg = {};
3953 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3954 vhost_net_set_config(get_vhost_net(nc->peer),
3955 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3956 }
3957 QTAILQ_INIT(&n->rsc_chains);
3958 n->qdev = dev;
3959
3960 net_rx_pkt_init(&n->rx_pkt);
3961
3962 if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer,
3963 &n->rss_data.peer_hash_types)) {
3964 n->rss_data.peer_hash_available = true;
3965 n->rss_data.supported_hash_types =
3966 n->rss_data.specified_hash_types.on_bits |
3967 (n->rss_data.specified_hash_types.auto_bits &
3968 n->rss_data.peer_hash_types);
3969 } else {
3970 n->rss_data.supported_hash_types =
3971 n->rss_data.specified_hash_types.on_bits |
3972 n->rss_data.specified_hash_types.auto_bits;
3973 }
3974 }
3975
virtio_net_device_unrealize(DeviceState * dev)3976 static void virtio_net_device_unrealize(DeviceState *dev)
3977 {
3978 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3979 VirtIONet *n = VIRTIO_NET(dev);
3980 int i, max_queue_pairs;
3981
3982 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3983 virtio_net_unload_ebpf(n);
3984 }
3985
3986 /* This will stop vhost backend if appropriate. */
3987 virtio_net_set_status(vdev, 0);
3988
3989 g_free(n->netclient_name);
3990 n->netclient_name = NULL;
3991 g_free(n->netclient_type);
3992 n->netclient_type = NULL;
3993
3994 g_free(n->mac_table.macs);
3995 g_free(n->vlans);
3996
3997 if (n->failover) {
3998 qobject_unref(n->primary_opts);
3999 device_listener_unregister(&n->primary_listener);
4000 migration_remove_notifier(&n->migration_state);
4001 } else {
4002 assert(n->primary_opts == NULL);
4003 }
4004
4005 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
4006 for (i = 0; i < max_queue_pairs; i++) {
4007 virtio_net_del_queue(n, i);
4008 }
4009 /* delete also control vq */
4010 virtio_del_queue(vdev, max_queue_pairs * 2);
4011 qemu_announce_timer_del(&n->announce_timer, false);
4012 g_free(n->vqs);
4013 qemu_del_nic(n->nic);
4014 virtio_net_rsc_cleanup(n);
4015 g_free(n->rss_data.indirections_table);
4016 net_rx_pkt_uninit(n->rx_pkt);
4017 virtio_cleanup(vdev);
4018 }
4019
virtio_net_reset(VirtIODevice * vdev)4020 static void virtio_net_reset(VirtIODevice *vdev)
4021 {
4022 VirtIONet *n = VIRTIO_NET(vdev);
4023 int i;
4024
4025 /* Reset back to compatibility mode */
4026 n->promisc = 1;
4027 n->allmulti = 0;
4028 n->alluni = 0;
4029 n->nomulti = 0;
4030 n->nouni = 0;
4031 n->nobcast = 0;
4032 /* multiqueue is disabled by default */
4033 n->curr_queue_pairs = 1;
4034 timer_del(n->announce_timer.tm);
4035 n->announce_timer.round = 0;
4036 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
4037
4038 /* Flush any MAC and VLAN filter table state */
4039 n->mac_table.in_use = 0;
4040 n->mac_table.first_multi = 0;
4041 n->mac_table.multi_overflow = 0;
4042 n->mac_table.uni_overflow = 0;
4043 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
4044 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
4045 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
4046
4047 /* Flush any async TX */
4048 for (i = 0; i < n->max_queue_pairs; i++) {
4049 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
4050 }
4051
4052 virtio_net_disable_rss(n);
4053 }
4054
virtio_net_instance_init(Object * obj)4055 static void virtio_net_instance_init(Object *obj)
4056 {
4057 VirtIONet *n = VIRTIO_NET(obj);
4058
4059 /*
4060 * The default config_size is sizeof(struct virtio_net_config).
4061 * Can be overridden with virtio_net_set_config_size.
4062 */
4063 n->config_size = sizeof(struct virtio_net_config);
4064 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
4065 "bootindex", "/ethernet-phy@0",
4066 DEVICE(n));
4067
4068 ebpf_rss_init(&n->ebpf_rss);
4069 }
4070
virtio_net_pre_save(void * opaque)4071 static int virtio_net_pre_save(void *opaque)
4072 {
4073 VirtIONet *n = opaque;
4074
4075 /* At this point, backend must be stopped, otherwise
4076 * it might keep writing to memory. */
4077 assert(!n->vhost_started);
4078
4079 return 0;
4080 }
4081
primary_unplug_pending(void * opaque)4082 static bool primary_unplug_pending(void *opaque)
4083 {
4084 DeviceState *dev = opaque;
4085 DeviceState *primary;
4086 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4087 VirtIONet *n = VIRTIO_NET(vdev);
4088
4089 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
4090 return false;
4091 }
4092 primary = failover_find_primary_device(n);
4093 return primary ? primary->pending_deleted_event : false;
4094 }
4095
dev_unplug_pending(void * opaque)4096 static bool dev_unplug_pending(void *opaque)
4097 {
4098 DeviceState *dev = opaque;
4099 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4100
4101 return vdc->primary_unplug_pending(dev);
4102 }
4103
4104 static const VMStateDescription vmstate_virtio_net = {
4105 .name = "virtio-net",
4106 .minimum_version_id = VIRTIO_NET_VM_VERSION,
4107 .version_id = VIRTIO_NET_VM_VERSION,
4108 .fields = (const VMStateField[]) {
4109 VMSTATE_VIRTIO_DEVICE,
4110 VMSTATE_END_OF_LIST()
4111 },
4112 .pre_save = virtio_net_pre_save,
4113 .dev_unplug_pending = dev_unplug_pending,
4114 };
4115
4116 static const Property virtio_net_properties[] = {
4117 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
4118 VIRTIO_NET_F_CSUM, true),
4119 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
4120 VIRTIO_NET_F_GUEST_CSUM, true),
4121 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
4122 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
4123 VIRTIO_NET_F_GUEST_TSO4, true),
4124 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
4125 VIRTIO_NET_F_GUEST_TSO6, true),
4126 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
4127 VIRTIO_NET_F_GUEST_ECN, true),
4128 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
4129 VIRTIO_NET_F_GUEST_UFO, true),
4130 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
4131 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
4132 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
4133 VIRTIO_NET_F_HOST_TSO4, true),
4134 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
4135 VIRTIO_NET_F_HOST_TSO6, true),
4136 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
4137 VIRTIO_NET_F_HOST_ECN, true),
4138 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
4139 VIRTIO_NET_F_HOST_UFO, true),
4140 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
4141 VIRTIO_NET_F_MRG_RXBUF, true),
4142 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
4143 VIRTIO_NET_F_STATUS, true),
4144 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4145 VIRTIO_NET_F_CTRL_VQ, true),
4146 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4147 VIRTIO_NET_F_CTRL_RX, true),
4148 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4149 VIRTIO_NET_F_CTRL_VLAN, true),
4150 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4151 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4152 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4153 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4154 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4155 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4156 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4157 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4158 VIRTIO_NET_F_RSS, false),
4159 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4160 VIRTIO_NET_F_HASH_REPORT, false),
4161 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4162 ebpf_rss_fds, qdev_prop_string, char*),
4163 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4164 VIRTIO_NET_F_RSC_EXT, false),
4165 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4166 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4167 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4168 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4169 TX_TIMER_INTERVAL),
4170 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4171 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4172 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4173 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4174 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4175 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4176 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4177 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4178 true),
4179 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4180 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4181 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4182 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4183 VIRTIO_NET_F_GUEST_USO4, true),
4184 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4185 VIRTIO_NET_F_GUEST_USO6, true),
4186 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4187 VIRTIO_NET_F_HOST_USO, true),
4188 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
4189 rss_data.specified_hash_types,
4190 VIRTIO_NET_HASH_REPORT_IPv4 - 1,
4191 ON_OFF_AUTO_AUTO),
4192 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet,
4193 rss_data.specified_hash_types,
4194 VIRTIO_NET_HASH_REPORT_TCPv4 - 1,
4195 ON_OFF_AUTO_AUTO),
4196 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet,
4197 rss_data.specified_hash_types,
4198 VIRTIO_NET_HASH_REPORT_UDPv4 - 1,
4199 ON_OFF_AUTO_AUTO),
4200 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet,
4201 rss_data.specified_hash_types,
4202 VIRTIO_NET_HASH_REPORT_IPv6 - 1,
4203 ON_OFF_AUTO_AUTO),
4204 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet,
4205 rss_data.specified_hash_types,
4206 VIRTIO_NET_HASH_REPORT_TCPv6 - 1,
4207 ON_OFF_AUTO_AUTO),
4208 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet,
4209 rss_data.specified_hash_types,
4210 VIRTIO_NET_HASH_REPORT_UDPv6 - 1,
4211 ON_OFF_AUTO_AUTO),
4212 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet,
4213 rss_data.specified_hash_types,
4214 VIRTIO_NET_HASH_REPORT_IPv6_EX - 1,
4215 ON_OFF_AUTO_AUTO),
4216 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet,
4217 rss_data.specified_hash_types,
4218 VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1,
4219 ON_OFF_AUTO_AUTO),
4220 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet,
4221 rss_data.specified_hash_types,
4222 VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1,
4223 ON_OFF_AUTO_AUTO),
4224 };
4225
virtio_net_class_init(ObjectClass * klass,const void * data)4226 static void virtio_net_class_init(ObjectClass *klass, const void *data)
4227 {
4228 DeviceClass *dc = DEVICE_CLASS(klass);
4229 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4230
4231 device_class_set_props(dc, virtio_net_properties);
4232 dc->vmsd = &vmstate_virtio_net;
4233 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4234 vdc->realize = virtio_net_device_realize;
4235 vdc->unrealize = virtio_net_device_unrealize;
4236 vdc->get_config = virtio_net_get_config;
4237 vdc->set_config = virtio_net_set_config;
4238 vdc->get_features = virtio_net_get_features;
4239 vdc->set_features = virtio_net_set_features;
4240 vdc->bad_features = virtio_net_bad_features;
4241 vdc->reset = virtio_net_reset;
4242 vdc->queue_reset = virtio_net_queue_reset;
4243 vdc->queue_enable = virtio_net_queue_enable;
4244 vdc->set_status = virtio_net_set_status;
4245 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4246 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4247 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4248 vdc->pre_load_queues = virtio_net_pre_load_queues;
4249 vdc->post_load = virtio_net_post_load_virtio;
4250 vdc->vmsd = &vmstate_virtio_net_device;
4251 vdc->primary_unplug_pending = primary_unplug_pending;
4252 vdc->get_vhost = virtio_net_get_vhost;
4253 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4254 }
4255
4256 static const TypeInfo virtio_net_info = {
4257 .name = TYPE_VIRTIO_NET,
4258 .parent = TYPE_VIRTIO_DEVICE,
4259 .instance_size = sizeof(VirtIONet),
4260 .instance_init = virtio_net_instance_init,
4261 .class_init = virtio_net_class_init,
4262 };
4263
virtio_register_types(void)4264 static void virtio_register_types(void)
4265 {
4266 type_register_static(&virtio_net_info);
4267 }
4268
4269 type_init(virtio_register_types)
4270