1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <net/route.h> 23 #include <net/xdp.h> 24 #include <net/net_failover.h> 25 26 static int napi_weight = NAPI_POLL_WEIGHT; 27 module_param(napi_weight, int, 0444); 28 29 static bool csum = true, gso = true, napi_tx = true; 30 module_param(csum, bool, 0444); 31 module_param(gso, bool, 0444); 32 module_param(napi_tx, bool, 0644); 33 34 /* FIXME: MTU in config. */ 35 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 36 #define GOOD_COPY_LEN 128 37 38 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 39 40 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 41 #define VIRTIO_XDP_HEADROOM 256 42 43 /* Separating two types of XDP xmit */ 44 #define VIRTIO_XDP_TX BIT(0) 45 #define VIRTIO_XDP_REDIR BIT(1) 46 47 #define VIRTIO_XDP_FLAG BIT(0) 48 49 /* RX packet size EWMA. The average packet size is used to determine the packet 50 * buffer size when refilling RX rings. As the entire RX ring may be refilled 51 * at once, the weight is chosen so that the EWMA will be insensitive to short- 52 * term, transient changes in packet size. 53 */ 54 DECLARE_EWMA(pkt_len, 0, 64) 55 56 #define VIRTNET_DRIVER_VERSION "1.0.0" 57 58 static const unsigned long guest_offloads[] = { 59 VIRTIO_NET_F_GUEST_TSO4, 60 VIRTIO_NET_F_GUEST_TSO6, 61 VIRTIO_NET_F_GUEST_ECN, 62 VIRTIO_NET_F_GUEST_UFO, 63 VIRTIO_NET_F_GUEST_CSUM, 64 VIRTIO_NET_F_GUEST_USO4, 65 VIRTIO_NET_F_GUEST_USO6, 66 VIRTIO_NET_F_GUEST_HDRLEN 67 }; 68 69 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 70 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 75 76 struct virtnet_stat_desc { 77 char desc[ETH_GSTRING_LEN]; 78 size_t offset; 79 }; 80 81 struct virtnet_sq_stats { 82 struct u64_stats_sync syncp; 83 u64 packets; 84 u64 bytes; 85 u64 xdp_tx; 86 u64 xdp_tx_drops; 87 u64 kicks; 88 u64 tx_timeouts; 89 }; 90 91 struct virtnet_rq_stats { 92 struct u64_stats_sync syncp; 93 u64 packets; 94 u64 bytes; 95 u64 drops; 96 u64 xdp_packets; 97 u64 xdp_tx; 98 u64 xdp_redirects; 99 u64 xdp_drops; 100 u64 kicks; 101 }; 102 103 #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) 104 #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) 105 106 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 107 { "packets", VIRTNET_SQ_STAT(packets) }, 108 { "bytes", VIRTNET_SQ_STAT(bytes) }, 109 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, 110 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, 111 { "kicks", VIRTNET_SQ_STAT(kicks) }, 112 { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, 113 }; 114 115 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 116 { "packets", VIRTNET_RQ_STAT(packets) }, 117 { "bytes", VIRTNET_RQ_STAT(bytes) }, 118 { "drops", VIRTNET_RQ_STAT(drops) }, 119 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, 120 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, 121 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, 122 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, 123 { "kicks", VIRTNET_RQ_STAT(kicks) }, 124 }; 125 126 #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) 127 #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) 128 129 /* Internal representation of a send virtqueue */ 130 struct send_queue { 131 /* Virtqueue associated with this send _queue */ 132 struct virtqueue *vq; 133 134 /* TX: fragments + linear part + virtio header */ 135 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 136 137 /* Name of the send queue: output.$index */ 138 char name[16]; 139 140 struct virtnet_sq_stats stats; 141 142 struct napi_struct napi; 143 144 /* Record whether sq is in reset state. */ 145 bool reset; 146 }; 147 148 /* Internal representation of a receive virtqueue */ 149 struct receive_queue { 150 /* Virtqueue associated with this receive_queue */ 151 struct virtqueue *vq; 152 153 struct napi_struct napi; 154 155 struct bpf_prog __rcu *xdp_prog; 156 157 struct virtnet_rq_stats stats; 158 159 /* Chain pages by the private ptr. */ 160 struct page *pages; 161 162 /* Average packet length for mergeable receive buffers. */ 163 struct ewma_pkt_len mrg_avg_pkt_len; 164 165 /* Page frag for packet buffer allocation. */ 166 struct page_frag alloc_frag; 167 168 /* RX: fragments + linear part + virtio header */ 169 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 170 171 /* Min single buffer size for mergeable buffers case. */ 172 unsigned int min_buf_len; 173 174 /* Name of this receive queue: input.$index */ 175 char name[16]; 176 177 struct xdp_rxq_info xdp_rxq; 178 }; 179 180 /* This structure can contain rss message with maximum settings for indirection table and keysize 181 * Note, that default structure that describes RSS configuration virtio_net_rss_config 182 * contains same info but can't handle table values. 183 * In any case, structure would be passed to virtio hw through sg_buf split by parts 184 * because table sizes may be differ according to the device configuration. 185 */ 186 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 187 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 188 struct virtio_net_ctrl_rss { 189 u32 hash_types; 190 u16 indirection_table_mask; 191 u16 unclassified_queue; 192 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 193 u16 max_tx_vq; 194 u8 hash_key_length; 195 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 196 }; 197 198 /* Control VQ buffers: protected by the rtnl lock */ 199 struct control_buf { 200 struct virtio_net_ctrl_hdr hdr; 201 virtio_net_ctrl_ack status; 202 struct virtio_net_ctrl_mq mq; 203 u8 promisc; 204 u8 allmulti; 205 __virtio16 vid; 206 __virtio64 offloads; 207 struct virtio_net_ctrl_rss rss; 208 }; 209 210 struct virtnet_info { 211 struct virtio_device *vdev; 212 struct virtqueue *cvq; 213 struct net_device *dev; 214 struct send_queue *sq; 215 struct receive_queue *rq; 216 unsigned int status; 217 218 /* Max # of queue pairs supported by the device */ 219 u16 max_queue_pairs; 220 221 /* # of queue pairs currently used by the driver */ 222 u16 curr_queue_pairs; 223 224 /* # of XDP queue pairs currently used by the driver */ 225 u16 xdp_queue_pairs; 226 227 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 228 bool xdp_enabled; 229 230 /* I like... big packets and I cannot lie! */ 231 bool big_packets; 232 233 /* number of sg entries allocated for big packets */ 234 unsigned int big_packets_num_skbfrags; 235 236 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 237 bool mergeable_rx_bufs; 238 239 /* Host supports rss and/or hash report */ 240 bool has_rss; 241 bool has_rss_hash_report; 242 u8 rss_key_size; 243 u16 rss_indir_table_size; 244 u32 rss_hash_types_supported; 245 u32 rss_hash_types_saved; 246 247 /* Has control virtqueue */ 248 bool has_cvq; 249 250 /* Host can handle any s/g split between our header and packet data */ 251 bool any_header_sg; 252 253 /* Packet virtio header size */ 254 u8 hdr_len; 255 256 /* Work struct for delayed refilling if we run low on memory. */ 257 struct delayed_work refill; 258 259 /* Is delayed refill enabled? */ 260 bool refill_enabled; 261 262 /* The lock to synchronize the access to refill_enabled */ 263 spinlock_t refill_lock; 264 265 /* Work struct for config space updates */ 266 struct work_struct config_work; 267 268 /* Does the affinity hint is set for virtqueues? */ 269 bool affinity_hint_set; 270 271 /* CPU hotplug instances for online & dead */ 272 struct hlist_node node; 273 struct hlist_node node_dead; 274 275 struct control_buf *ctrl; 276 277 /* Ethtool settings */ 278 u8 duplex; 279 u32 speed; 280 281 /* Interrupt coalescing settings */ 282 u32 tx_usecs; 283 u32 rx_usecs; 284 u32 tx_max_packets; 285 u32 rx_max_packets; 286 287 unsigned long guest_offloads; 288 unsigned long guest_offloads_capable; 289 290 /* failover when STANDBY feature enabled */ 291 struct failover *failover; 292 }; 293 294 struct padded_vnet_hdr { 295 struct virtio_net_hdr_v1_hash hdr; 296 /* 297 * hdr is in a separate sg buffer, and data sg buffer shares same page 298 * with this header sg. This padding makes next sg 16 byte aligned 299 * after the header. 300 */ 301 char padding[12]; 302 }; 303 304 static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf); 305 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 306 307 static bool is_xdp_frame(void *ptr) 308 { 309 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 310 } 311 312 static void *xdp_to_ptr(struct xdp_frame *ptr) 313 { 314 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 315 } 316 317 static struct xdp_frame *ptr_to_xdp(void *ptr) 318 { 319 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 320 } 321 322 /* Converting between virtqueue no. and kernel tx/rx queue no. 323 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 324 */ 325 static int vq2txq(struct virtqueue *vq) 326 { 327 return (vq->index - 1) / 2; 328 } 329 330 static int txq2vq(int txq) 331 { 332 return txq * 2 + 1; 333 } 334 335 static int vq2rxq(struct virtqueue *vq) 336 { 337 return vq->index / 2; 338 } 339 340 static int rxq2vq(int rxq) 341 { 342 return rxq * 2; 343 } 344 345 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) 346 { 347 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; 348 } 349 350 /* 351 * private is used to chain pages for big packets, put the whole 352 * most recent used list in the beginning for reuse 353 */ 354 static void give_pages(struct receive_queue *rq, struct page *page) 355 { 356 struct page *end; 357 358 /* Find end of list, sew whole thing into vi->rq.pages. */ 359 for (end = page; end->private; end = (struct page *)end->private); 360 end->private = (unsigned long)rq->pages; 361 rq->pages = page; 362 } 363 364 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 365 { 366 struct page *p = rq->pages; 367 368 if (p) { 369 rq->pages = (struct page *)p->private; 370 /* clear private here, it is used to chain pages */ 371 p->private = 0; 372 } else 373 p = alloc_page(gfp_mask); 374 return p; 375 } 376 377 static void enable_delayed_refill(struct virtnet_info *vi) 378 { 379 spin_lock_bh(&vi->refill_lock); 380 vi->refill_enabled = true; 381 spin_unlock_bh(&vi->refill_lock); 382 } 383 384 static void disable_delayed_refill(struct virtnet_info *vi) 385 { 386 spin_lock_bh(&vi->refill_lock); 387 vi->refill_enabled = false; 388 spin_unlock_bh(&vi->refill_lock); 389 } 390 391 static void virtqueue_napi_schedule(struct napi_struct *napi, 392 struct virtqueue *vq) 393 { 394 if (napi_schedule_prep(napi)) { 395 virtqueue_disable_cb(vq); 396 __napi_schedule(napi); 397 } 398 } 399 400 static void virtqueue_napi_complete(struct napi_struct *napi, 401 struct virtqueue *vq, int processed) 402 { 403 int opaque; 404 405 opaque = virtqueue_enable_cb_prepare(vq); 406 if (napi_complete_done(napi, processed)) { 407 if (unlikely(virtqueue_poll(vq, opaque))) 408 virtqueue_napi_schedule(napi, vq); 409 } else { 410 virtqueue_disable_cb(vq); 411 } 412 } 413 414 static void skb_xmit_done(struct virtqueue *vq) 415 { 416 struct virtnet_info *vi = vq->vdev->priv; 417 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 418 419 /* Suppress further interrupts. */ 420 virtqueue_disable_cb(vq); 421 422 if (napi->weight) 423 virtqueue_napi_schedule(napi, vq); 424 else 425 /* We were probably waiting for more output buffers. */ 426 netif_wake_subqueue(vi->dev, vq2txq(vq)); 427 } 428 429 #define MRG_CTX_HEADER_SHIFT 22 430 static void *mergeable_len_to_ctx(unsigned int truesize, 431 unsigned int headroom) 432 { 433 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 434 } 435 436 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 437 { 438 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 439 } 440 441 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 442 { 443 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 444 } 445 446 /* Called from bottom half context */ 447 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 448 struct receive_queue *rq, 449 struct page *page, unsigned int offset, 450 unsigned int len, unsigned int truesize) 451 { 452 struct sk_buff *skb; 453 struct virtio_net_hdr_mrg_rxbuf *hdr; 454 unsigned int copy, hdr_len, hdr_padded_len; 455 struct page *page_to_free = NULL; 456 int tailroom, shinfo_size; 457 char *p, *hdr_p, *buf; 458 459 p = page_address(page) + offset; 460 hdr_p = p; 461 462 hdr_len = vi->hdr_len; 463 if (vi->mergeable_rx_bufs) 464 hdr_padded_len = hdr_len; 465 else 466 hdr_padded_len = sizeof(struct padded_vnet_hdr); 467 468 buf = p; 469 len -= hdr_len; 470 offset += hdr_padded_len; 471 p += hdr_padded_len; 472 tailroom = truesize - hdr_padded_len - len; 473 474 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 475 476 /* copy small packet so we can reuse these pages */ 477 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 478 skb = build_skb(buf, truesize); 479 if (unlikely(!skb)) 480 return NULL; 481 482 skb_reserve(skb, p - buf); 483 skb_put(skb, len); 484 485 page = (struct page *)page->private; 486 if (page) 487 give_pages(rq, page); 488 goto ok; 489 } 490 491 /* copy small packet so we can reuse these pages for small data */ 492 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 493 if (unlikely(!skb)) 494 return NULL; 495 496 /* Copy all frame if it fits skb->head, otherwise 497 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 498 */ 499 if (len <= skb_tailroom(skb)) 500 copy = len; 501 else 502 copy = ETH_HLEN; 503 skb_put_data(skb, p, copy); 504 505 len -= copy; 506 offset += copy; 507 508 if (vi->mergeable_rx_bufs) { 509 if (len) 510 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 511 else 512 page_to_free = page; 513 goto ok; 514 } 515 516 /* 517 * Verify that we can indeed put this data into a skb. 518 * This is here to handle cases when the device erroneously 519 * tries to receive more than is possible. This is usually 520 * the case of a broken device. 521 */ 522 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 523 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 524 dev_kfree_skb(skb); 525 return NULL; 526 } 527 BUG_ON(offset >= PAGE_SIZE); 528 while (len) { 529 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 530 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 531 frag_size, truesize); 532 len -= frag_size; 533 page = (struct page *)page->private; 534 offset = 0; 535 } 536 537 if (page) 538 give_pages(rq, page); 539 540 ok: 541 hdr = skb_vnet_hdr(skb); 542 memcpy(hdr, hdr_p, hdr_len); 543 if (page_to_free) 544 put_page(page_to_free); 545 546 return skb; 547 } 548 549 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 550 struct send_queue *sq, 551 struct xdp_frame *xdpf) 552 { 553 struct virtio_net_hdr_mrg_rxbuf *hdr; 554 struct skb_shared_info *shinfo; 555 u8 nr_frags = 0; 556 int err, i; 557 558 if (unlikely(xdpf->headroom < vi->hdr_len)) 559 return -EOVERFLOW; 560 561 if (unlikely(xdp_frame_has_frags(xdpf))) { 562 shinfo = xdp_get_shared_info_from_frame(xdpf); 563 nr_frags = shinfo->nr_frags; 564 } 565 566 /* In wrapping function virtnet_xdp_xmit(), we need to free 567 * up the pending old buffers, where we need to calculate the 568 * position of skb_shared_info in xdp_get_frame_len() and 569 * xdp_return_frame(), which will involve to xdpf->data and 570 * xdpf->headroom. Therefore, we need to update the value of 571 * headroom synchronously here. 572 */ 573 xdpf->headroom -= vi->hdr_len; 574 xdpf->data -= vi->hdr_len; 575 /* Zero header and leave csum up to XDP layers */ 576 hdr = xdpf->data; 577 memset(hdr, 0, vi->hdr_len); 578 xdpf->len += vi->hdr_len; 579 580 sg_init_table(sq->sg, nr_frags + 1); 581 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 582 for (i = 0; i < nr_frags; i++) { 583 skb_frag_t *frag = &shinfo->frags[i]; 584 585 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 586 skb_frag_size(frag), skb_frag_off(frag)); 587 } 588 589 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 590 xdp_to_ptr(xdpf), GFP_ATOMIC); 591 if (unlikely(err)) 592 return -ENOSPC; /* Caller handle free/refcnt */ 593 594 return 0; 595 } 596 597 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 598 * the current cpu, so it does not need to be locked. 599 * 600 * Here we use marco instead of inline functions because we have to deal with 601 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 602 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 603 * functions to perfectly solve these three problems at the same time. 604 */ 605 #define virtnet_xdp_get_sq(vi) ({ \ 606 int cpu = smp_processor_id(); \ 607 struct netdev_queue *txq; \ 608 typeof(vi) v = (vi); \ 609 unsigned int qp; \ 610 \ 611 if (v->curr_queue_pairs > nr_cpu_ids) { \ 612 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 613 qp += cpu; \ 614 txq = netdev_get_tx_queue(v->dev, qp); \ 615 __netif_tx_acquire(txq); \ 616 } else { \ 617 qp = cpu % v->curr_queue_pairs; \ 618 txq = netdev_get_tx_queue(v->dev, qp); \ 619 __netif_tx_lock(txq, cpu); \ 620 } \ 621 v->sq + qp; \ 622 }) 623 624 #define virtnet_xdp_put_sq(vi, q) { \ 625 struct netdev_queue *txq; \ 626 typeof(vi) v = (vi); \ 627 \ 628 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 629 if (v->curr_queue_pairs > nr_cpu_ids) \ 630 __netif_tx_release(txq); \ 631 else \ 632 __netif_tx_unlock(txq); \ 633 } 634 635 static int virtnet_xdp_xmit(struct net_device *dev, 636 int n, struct xdp_frame **frames, u32 flags) 637 { 638 struct virtnet_info *vi = netdev_priv(dev); 639 struct receive_queue *rq = vi->rq; 640 struct bpf_prog *xdp_prog; 641 struct send_queue *sq; 642 unsigned int len; 643 int packets = 0; 644 int bytes = 0; 645 int nxmit = 0; 646 int kicks = 0; 647 void *ptr; 648 int ret; 649 int i; 650 651 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 652 * indicate XDP resources have been successfully allocated. 653 */ 654 xdp_prog = rcu_access_pointer(rq->xdp_prog); 655 if (!xdp_prog) 656 return -ENXIO; 657 658 sq = virtnet_xdp_get_sq(vi); 659 660 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 661 ret = -EINVAL; 662 goto out; 663 } 664 665 /* Free up any pending old buffers before queueing new ones. */ 666 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 667 if (likely(is_xdp_frame(ptr))) { 668 struct xdp_frame *frame = ptr_to_xdp(ptr); 669 670 bytes += xdp_get_frame_len(frame); 671 xdp_return_frame(frame); 672 } else { 673 struct sk_buff *skb = ptr; 674 675 bytes += skb->len; 676 napi_consume_skb(skb, false); 677 } 678 packets++; 679 } 680 681 for (i = 0; i < n; i++) { 682 struct xdp_frame *xdpf = frames[i]; 683 684 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 685 break; 686 nxmit++; 687 } 688 ret = nxmit; 689 690 if (flags & XDP_XMIT_FLUSH) { 691 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 692 kicks = 1; 693 } 694 out: 695 u64_stats_update_begin(&sq->stats.syncp); 696 sq->stats.bytes += bytes; 697 sq->stats.packets += packets; 698 sq->stats.xdp_tx += n; 699 sq->stats.xdp_tx_drops += n - nxmit; 700 sq->stats.kicks += kicks; 701 u64_stats_update_end(&sq->stats.syncp); 702 703 virtnet_xdp_put_sq(vi, sq); 704 return ret; 705 } 706 707 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 708 { 709 return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; 710 } 711 712 /* We copy the packet for XDP in the following cases: 713 * 714 * 1) Packet is scattered across multiple rx buffers. 715 * 2) Headroom space is insufficient. 716 * 717 * This is inefficient but it's a temporary condition that 718 * we hit right after XDP is enabled and until queue is refilled 719 * with large buffers with sufficient headroom - so it should affect 720 * at most queue size packets. 721 * Afterwards, the conditions to enable 722 * XDP should preclude the underlying device from sending packets 723 * across multiple buffers (num_buf > 1), and we make sure buffers 724 * have enough headroom. 725 */ 726 static struct page *xdp_linearize_page(struct receive_queue *rq, 727 int *num_buf, 728 struct page *p, 729 int offset, 730 int page_off, 731 unsigned int *len) 732 { 733 struct page *page = alloc_page(GFP_ATOMIC); 734 735 if (!page) 736 return NULL; 737 738 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 739 page_off += *len; 740 741 while (--*num_buf) { 742 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 743 unsigned int buflen; 744 void *buf; 745 int off; 746 747 buf = virtqueue_get_buf(rq->vq, &buflen); 748 if (unlikely(!buf)) 749 goto err_buf; 750 751 p = virt_to_head_page(buf); 752 off = buf - page_address(p); 753 754 /* guard against a misconfigured or uncooperative backend that 755 * is sending packet larger than the MTU. 756 */ 757 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 758 put_page(p); 759 goto err_buf; 760 } 761 762 memcpy(page_address(page) + page_off, 763 page_address(p) + off, buflen); 764 page_off += buflen; 765 put_page(p); 766 } 767 768 /* Headroom does not contribute to packet length */ 769 *len = page_off - VIRTIO_XDP_HEADROOM; 770 return page; 771 err_buf: 772 __free_pages(page, 0); 773 return NULL; 774 } 775 776 static struct sk_buff *receive_small(struct net_device *dev, 777 struct virtnet_info *vi, 778 struct receive_queue *rq, 779 void *buf, void *ctx, 780 unsigned int len, 781 unsigned int *xdp_xmit, 782 struct virtnet_rq_stats *stats) 783 { 784 struct sk_buff *skb; 785 struct bpf_prog *xdp_prog; 786 unsigned int xdp_headroom = (unsigned long)ctx; 787 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 788 unsigned int headroom = vi->hdr_len + header_offset; 789 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 790 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 791 struct page *page = virt_to_head_page(buf); 792 unsigned int delta = 0; 793 struct page *xdp_page; 794 int err; 795 unsigned int metasize = 0; 796 797 len -= vi->hdr_len; 798 stats->bytes += len; 799 800 if (unlikely(len > GOOD_PACKET_LEN)) { 801 pr_debug("%s: rx error: len %u exceeds max size %d\n", 802 dev->name, len, GOOD_PACKET_LEN); 803 dev->stats.rx_length_errors++; 804 goto err; 805 } 806 807 if (likely(!vi->xdp_enabled)) { 808 xdp_prog = NULL; 809 goto skip_xdp; 810 } 811 812 rcu_read_lock(); 813 xdp_prog = rcu_dereference(rq->xdp_prog); 814 if (xdp_prog) { 815 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 816 struct xdp_frame *xdpf; 817 struct xdp_buff xdp; 818 void *orig_data; 819 u32 act; 820 821 if (unlikely(hdr->hdr.gso_type)) 822 goto err_xdp; 823 824 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 825 int offset = buf - page_address(page) + header_offset; 826 unsigned int tlen = len + vi->hdr_len; 827 int num_buf = 1; 828 829 xdp_headroom = virtnet_get_headroom(vi); 830 header_offset = VIRTNET_RX_PAD + xdp_headroom; 831 headroom = vi->hdr_len + header_offset; 832 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 833 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 834 xdp_page = xdp_linearize_page(rq, &num_buf, page, 835 offset, header_offset, 836 &tlen); 837 if (!xdp_page) 838 goto err_xdp; 839 840 buf = page_address(xdp_page); 841 put_page(page); 842 page = xdp_page; 843 } 844 845 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 846 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 847 xdp_headroom, len, true); 848 orig_data = xdp.data; 849 act = bpf_prog_run_xdp(xdp_prog, &xdp); 850 stats->xdp_packets++; 851 852 switch (act) { 853 case XDP_PASS: 854 /* Recalculate length in case bpf program changed it */ 855 delta = orig_data - xdp.data; 856 len = xdp.data_end - xdp.data; 857 metasize = xdp.data - xdp.data_meta; 858 break; 859 case XDP_TX: 860 stats->xdp_tx++; 861 xdpf = xdp_convert_buff_to_frame(&xdp); 862 if (unlikely(!xdpf)) 863 goto err_xdp; 864 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 865 if (unlikely(!err)) { 866 xdp_return_frame_rx_napi(xdpf); 867 } else if (unlikely(err < 0)) { 868 trace_xdp_exception(vi->dev, xdp_prog, act); 869 goto err_xdp; 870 } 871 *xdp_xmit |= VIRTIO_XDP_TX; 872 rcu_read_unlock(); 873 goto xdp_xmit; 874 case XDP_REDIRECT: 875 stats->xdp_redirects++; 876 err = xdp_do_redirect(dev, &xdp, xdp_prog); 877 if (err) 878 goto err_xdp; 879 *xdp_xmit |= VIRTIO_XDP_REDIR; 880 rcu_read_unlock(); 881 goto xdp_xmit; 882 default: 883 bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act); 884 fallthrough; 885 case XDP_ABORTED: 886 trace_xdp_exception(vi->dev, xdp_prog, act); 887 goto err_xdp; 888 case XDP_DROP: 889 goto err_xdp; 890 } 891 } 892 rcu_read_unlock(); 893 894 skip_xdp: 895 skb = build_skb(buf, buflen); 896 if (!skb) 897 goto err; 898 skb_reserve(skb, headroom - delta); 899 skb_put(skb, len); 900 if (!xdp_prog) { 901 buf += header_offset; 902 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); 903 } /* keep zeroed vnet hdr since XDP is loaded */ 904 905 if (metasize) 906 skb_metadata_set(skb, metasize); 907 908 return skb; 909 910 err_xdp: 911 rcu_read_unlock(); 912 stats->xdp_drops++; 913 err: 914 stats->drops++; 915 put_page(page); 916 xdp_xmit: 917 return NULL; 918 } 919 920 static struct sk_buff *receive_big(struct net_device *dev, 921 struct virtnet_info *vi, 922 struct receive_queue *rq, 923 void *buf, 924 unsigned int len, 925 struct virtnet_rq_stats *stats) 926 { 927 struct page *page = buf; 928 struct sk_buff *skb = 929 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); 930 931 stats->bytes += len - vi->hdr_len; 932 if (unlikely(!skb)) 933 goto err; 934 935 return skb; 936 937 err: 938 stats->drops++; 939 give_pages(rq, page); 940 return NULL; 941 } 942 943 /* Why not use xdp_build_skb_from_frame() ? 944 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 945 * virtio-net there are 2 points that do not match its requirements: 946 * 1. The size of the prefilled buffer is not fixed before xdp is set. 947 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 948 * like eth_type_trans() (which virtio-net does in receive_buf()). 949 */ 950 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 951 struct virtnet_info *vi, 952 struct xdp_buff *xdp, 953 unsigned int xdp_frags_truesz) 954 { 955 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 956 unsigned int headroom, data_len; 957 struct sk_buff *skb; 958 int metasize; 959 u8 nr_frags; 960 961 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 962 pr_debug("Error building skb as missing reserved tailroom for xdp"); 963 return NULL; 964 } 965 966 if (unlikely(xdp_buff_has_frags(xdp))) 967 nr_frags = sinfo->nr_frags; 968 969 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 970 if (unlikely(!skb)) 971 return NULL; 972 973 headroom = xdp->data - xdp->data_hard_start; 974 data_len = xdp->data_end - xdp->data; 975 skb_reserve(skb, headroom); 976 __skb_put(skb, data_len); 977 978 metasize = xdp->data - xdp->data_meta; 979 metasize = metasize > 0 ? metasize : 0; 980 if (metasize) 981 skb_metadata_set(skb, metasize); 982 983 if (unlikely(xdp_buff_has_frags(xdp))) 984 xdp_update_skb_shared_info(skb, nr_frags, 985 sinfo->xdp_frags_size, 986 xdp_frags_truesz, 987 xdp_buff_is_frag_pfmemalloc(xdp)); 988 989 return skb; 990 } 991 992 /* TODO: build xdp in big mode */ 993 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 994 struct virtnet_info *vi, 995 struct receive_queue *rq, 996 struct xdp_buff *xdp, 997 void *buf, 998 unsigned int len, 999 unsigned int frame_sz, 1000 int *num_buf, 1001 unsigned int *xdp_frags_truesize, 1002 struct virtnet_rq_stats *stats) 1003 { 1004 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1005 unsigned int headroom, tailroom, room; 1006 unsigned int truesize, cur_frag_size; 1007 struct skb_shared_info *shinfo; 1008 unsigned int xdp_frags_truesz = 0; 1009 struct page *page; 1010 skb_frag_t *frag; 1011 int offset; 1012 void *ctx; 1013 1014 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1015 xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, 1016 VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1017 1018 if (!*num_buf) 1019 return 0; 1020 1021 if (*num_buf > 1) { 1022 /* If we want to build multi-buffer xdp, we need 1023 * to specify that the flags of xdp_buff have the 1024 * XDP_FLAGS_HAS_FRAG bit. 1025 */ 1026 if (!xdp_buff_has_frags(xdp)) 1027 xdp_buff_set_frags_flag(xdp); 1028 1029 shinfo = xdp_get_shared_info_from_buff(xdp); 1030 shinfo->nr_frags = 0; 1031 shinfo->xdp_frags_size = 0; 1032 } 1033 1034 if (*num_buf > MAX_SKB_FRAGS + 1) 1035 return -EINVAL; 1036 1037 while (--*num_buf > 0) { 1038 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 1039 if (unlikely(!buf)) { 1040 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1041 dev->name, *num_buf, 1042 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1043 dev->stats.rx_length_errors++; 1044 return -EINVAL; 1045 } 1046 1047 stats->bytes += len; 1048 page = virt_to_head_page(buf); 1049 offset = buf - page_address(page); 1050 1051 truesize = mergeable_ctx_to_truesize(ctx); 1052 headroom = mergeable_ctx_to_headroom(ctx); 1053 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1054 room = SKB_DATA_ALIGN(headroom + tailroom); 1055 1056 cur_frag_size = truesize; 1057 xdp_frags_truesz += cur_frag_size; 1058 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 1059 put_page(page); 1060 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1061 dev->name, len, (unsigned long)(truesize - room)); 1062 dev->stats.rx_length_errors++; 1063 return -EINVAL; 1064 } 1065 1066 frag = &shinfo->frags[shinfo->nr_frags++]; 1067 __skb_frag_set_page(frag, page); 1068 skb_frag_off_set(frag, offset); 1069 skb_frag_size_set(frag, len); 1070 if (page_is_pfmemalloc(page)) 1071 xdp_buff_set_frag_pfmemalloc(xdp); 1072 1073 shinfo->xdp_frags_size += len; 1074 } 1075 1076 *xdp_frags_truesize = xdp_frags_truesz; 1077 return 0; 1078 } 1079 1080 static struct sk_buff *receive_mergeable(struct net_device *dev, 1081 struct virtnet_info *vi, 1082 struct receive_queue *rq, 1083 void *buf, 1084 void *ctx, 1085 unsigned int len, 1086 unsigned int *xdp_xmit, 1087 struct virtnet_rq_stats *stats) 1088 { 1089 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1090 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1091 struct page *page = virt_to_head_page(buf); 1092 int offset = buf - page_address(page); 1093 struct sk_buff *head_skb, *curr_skb; 1094 struct bpf_prog *xdp_prog; 1095 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1096 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1097 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1098 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1099 unsigned int frame_sz, xdp_room; 1100 int err; 1101 1102 head_skb = NULL; 1103 stats->bytes += len - vi->hdr_len; 1104 1105 if (unlikely(len > truesize - room)) { 1106 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1107 dev->name, len, (unsigned long)(truesize - room)); 1108 dev->stats.rx_length_errors++; 1109 goto err_skb; 1110 } 1111 1112 if (likely(!vi->xdp_enabled)) { 1113 xdp_prog = NULL; 1114 goto skip_xdp; 1115 } 1116 1117 rcu_read_lock(); 1118 xdp_prog = rcu_dereference(rq->xdp_prog); 1119 if (xdp_prog) { 1120 unsigned int xdp_frags_truesz = 0; 1121 struct skb_shared_info *shinfo; 1122 struct xdp_frame *xdpf; 1123 struct page *xdp_page; 1124 struct xdp_buff xdp; 1125 void *data; 1126 u32 act; 1127 int i; 1128 1129 /* Transient failure which in theory could occur if 1130 * in-flight packets from before XDP was enabled reach 1131 * the receive path after XDP is loaded. 1132 */ 1133 if (unlikely(hdr->hdr.gso_type)) 1134 goto err_xdp; 1135 1136 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 1137 * with headroom may add hole in truesize, which 1138 * make their length exceed PAGE_SIZE. So we disabled the 1139 * hole mechanism for xdp. See add_recvbuf_mergeable(). 1140 */ 1141 frame_sz = truesize; 1142 1143 /* This happens when headroom is not enough because 1144 * of the buffer was prefilled before XDP is set. 1145 * This should only happen for the first several packets. 1146 * In fact, vq reset can be used here to help us clean up 1147 * the prefilled buffers, but many existing devices do not 1148 * support it, and we don't want to bother users who are 1149 * using xdp normally. 1150 */ 1151 if (!xdp_prog->aux->xdp_has_frags && 1152 (num_buf > 1 || headroom < virtnet_get_headroom(vi))) { 1153 /* linearize data for XDP */ 1154 xdp_page = xdp_linearize_page(rq, &num_buf, 1155 page, offset, 1156 VIRTIO_XDP_HEADROOM, 1157 &len); 1158 frame_sz = PAGE_SIZE; 1159 1160 if (!xdp_page) 1161 goto err_xdp; 1162 offset = VIRTIO_XDP_HEADROOM; 1163 } else if (unlikely(headroom < virtnet_get_headroom(vi))) { 1164 xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 1165 sizeof(struct skb_shared_info)); 1166 if (len + xdp_room > PAGE_SIZE) 1167 goto err_xdp; 1168 1169 xdp_page = alloc_page(GFP_ATOMIC); 1170 if (!xdp_page) 1171 goto err_xdp; 1172 1173 memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, 1174 page_address(page) + offset, len); 1175 frame_sz = PAGE_SIZE; 1176 offset = VIRTIO_XDP_HEADROOM; 1177 } else { 1178 xdp_page = page; 1179 } 1180 1181 data = page_address(xdp_page) + offset; 1182 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 1183 &num_buf, &xdp_frags_truesz, stats); 1184 if (unlikely(err)) 1185 goto err_xdp_frags; 1186 1187 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1188 stats->xdp_packets++; 1189 1190 switch (act) { 1191 case XDP_PASS: 1192 if (unlikely(xdp_page != page)) 1193 put_page(page); 1194 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 1195 rcu_read_unlock(); 1196 return head_skb; 1197 case XDP_TX: 1198 stats->xdp_tx++; 1199 xdpf = xdp_convert_buff_to_frame(&xdp); 1200 if (unlikely(!xdpf)) { 1201 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1202 goto err_xdp_frags; 1203 } 1204 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1205 if (unlikely(!err)) { 1206 xdp_return_frame_rx_napi(xdpf); 1207 } else if (unlikely(err < 0)) { 1208 trace_xdp_exception(vi->dev, xdp_prog, act); 1209 goto err_xdp_frags; 1210 } 1211 *xdp_xmit |= VIRTIO_XDP_TX; 1212 if (unlikely(xdp_page != page)) 1213 put_page(page); 1214 rcu_read_unlock(); 1215 goto xdp_xmit; 1216 case XDP_REDIRECT: 1217 stats->xdp_redirects++; 1218 err = xdp_do_redirect(dev, &xdp, xdp_prog); 1219 if (err) 1220 goto err_xdp_frags; 1221 *xdp_xmit |= VIRTIO_XDP_REDIR; 1222 if (unlikely(xdp_page != page)) 1223 put_page(page); 1224 rcu_read_unlock(); 1225 goto xdp_xmit; 1226 default: 1227 bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act); 1228 fallthrough; 1229 case XDP_ABORTED: 1230 trace_xdp_exception(vi->dev, xdp_prog, act); 1231 fallthrough; 1232 case XDP_DROP: 1233 goto err_xdp_frags; 1234 } 1235 err_xdp_frags: 1236 if (unlikely(xdp_page != page)) 1237 __free_pages(xdp_page, 0); 1238 1239 if (xdp_buff_has_frags(&xdp)) { 1240 shinfo = xdp_get_shared_info_from_buff(&xdp); 1241 for (i = 0; i < shinfo->nr_frags; i++) { 1242 xdp_page = skb_frag_page(&shinfo->frags[i]); 1243 put_page(xdp_page); 1244 } 1245 } 1246 1247 goto err_xdp; 1248 } 1249 rcu_read_unlock(); 1250 1251 skip_xdp: 1252 head_skb = page_to_skb(vi, rq, page, offset, len, truesize); 1253 curr_skb = head_skb; 1254 1255 if (unlikely(!curr_skb)) 1256 goto err_skb; 1257 while (--num_buf) { 1258 int num_skb_frags; 1259 1260 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 1261 if (unlikely(!buf)) { 1262 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1263 dev->name, num_buf, 1264 virtio16_to_cpu(vi->vdev, 1265 hdr->num_buffers)); 1266 dev->stats.rx_length_errors++; 1267 goto err_buf; 1268 } 1269 1270 stats->bytes += len; 1271 page = virt_to_head_page(buf); 1272 1273 truesize = mergeable_ctx_to_truesize(ctx); 1274 headroom = mergeable_ctx_to_headroom(ctx); 1275 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1276 room = SKB_DATA_ALIGN(headroom + tailroom); 1277 if (unlikely(len > truesize - room)) { 1278 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1279 dev->name, len, (unsigned long)(truesize - room)); 1280 dev->stats.rx_length_errors++; 1281 goto err_skb; 1282 } 1283 1284 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 1285 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 1286 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 1287 1288 if (unlikely(!nskb)) 1289 goto err_skb; 1290 if (curr_skb == head_skb) 1291 skb_shinfo(curr_skb)->frag_list = nskb; 1292 else 1293 curr_skb->next = nskb; 1294 curr_skb = nskb; 1295 head_skb->truesize += nskb->truesize; 1296 num_skb_frags = 0; 1297 } 1298 if (curr_skb != head_skb) { 1299 head_skb->data_len += len; 1300 head_skb->len += len; 1301 head_skb->truesize += truesize; 1302 } 1303 offset = buf - page_address(page); 1304 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 1305 put_page(page); 1306 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 1307 len, truesize); 1308 } else { 1309 skb_add_rx_frag(curr_skb, num_skb_frags, page, 1310 offset, len, truesize); 1311 } 1312 } 1313 1314 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 1315 return head_skb; 1316 1317 err_xdp: 1318 rcu_read_unlock(); 1319 stats->xdp_drops++; 1320 err_skb: 1321 put_page(page); 1322 while (num_buf-- > 1) { 1323 buf = virtqueue_get_buf(rq->vq, &len); 1324 if (unlikely(!buf)) { 1325 pr_debug("%s: rx error: %d buffers missing\n", 1326 dev->name, num_buf); 1327 dev->stats.rx_length_errors++; 1328 break; 1329 } 1330 stats->bytes += len; 1331 page = virt_to_head_page(buf); 1332 put_page(page); 1333 } 1334 err_buf: 1335 stats->drops++; 1336 dev_kfree_skb(head_skb); 1337 xdp_xmit: 1338 return NULL; 1339 } 1340 1341 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 1342 struct sk_buff *skb) 1343 { 1344 enum pkt_hash_types rss_hash_type; 1345 1346 if (!hdr_hash || !skb) 1347 return; 1348 1349 switch (__le16_to_cpu(hdr_hash->hash_report)) { 1350 case VIRTIO_NET_HASH_REPORT_TCPv4: 1351 case VIRTIO_NET_HASH_REPORT_UDPv4: 1352 case VIRTIO_NET_HASH_REPORT_TCPv6: 1353 case VIRTIO_NET_HASH_REPORT_UDPv6: 1354 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 1355 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 1356 rss_hash_type = PKT_HASH_TYPE_L4; 1357 break; 1358 case VIRTIO_NET_HASH_REPORT_IPv4: 1359 case VIRTIO_NET_HASH_REPORT_IPv6: 1360 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 1361 rss_hash_type = PKT_HASH_TYPE_L3; 1362 break; 1363 case VIRTIO_NET_HASH_REPORT_NONE: 1364 default: 1365 rss_hash_type = PKT_HASH_TYPE_NONE; 1366 } 1367 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 1368 } 1369 1370 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1371 void *buf, unsigned int len, void **ctx, 1372 unsigned int *xdp_xmit, 1373 struct virtnet_rq_stats *stats) 1374 { 1375 struct net_device *dev = vi->dev; 1376 struct sk_buff *skb; 1377 struct virtio_net_hdr_mrg_rxbuf *hdr; 1378 1379 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1380 pr_debug("%s: short packet %i\n", dev->name, len); 1381 dev->stats.rx_length_errors++; 1382 virtnet_rq_free_unused_buf(rq->vq, buf); 1383 return; 1384 } 1385 1386 if (vi->mergeable_rx_bufs) 1387 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1388 stats); 1389 else if (vi->big_packets) 1390 skb = receive_big(dev, vi, rq, buf, len, stats); 1391 else 1392 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1393 1394 if (unlikely(!skb)) 1395 return; 1396 1397 hdr = skb_vnet_hdr(skb); 1398 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 1399 virtio_skb_set_hash((const struct virtio_net_hdr_v1_hash *)hdr, skb); 1400 1401 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1402 skb->ip_summed = CHECKSUM_UNNECESSARY; 1403 1404 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1405 virtio_is_little_endian(vi->vdev))) { 1406 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1407 dev->name, hdr->hdr.gso_type, 1408 hdr->hdr.gso_size); 1409 goto frame_err; 1410 } 1411 1412 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 1413 skb->protocol = eth_type_trans(skb, dev); 1414 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1415 ntohs(skb->protocol), skb->len, skb->pkt_type); 1416 1417 napi_gro_receive(&rq->napi, skb); 1418 return; 1419 1420 frame_err: 1421 dev->stats.rx_frame_errors++; 1422 dev_kfree_skb(skb); 1423 } 1424 1425 /* Unlike mergeable buffers, all buffers are allocated to the 1426 * same size, except for the headroom. For this reason we do 1427 * not need to use mergeable_len_to_ctx here - it is enough 1428 * to store the headroom as the context ignoring the truesize. 1429 */ 1430 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 1431 gfp_t gfp) 1432 { 1433 struct page_frag *alloc_frag = &rq->alloc_frag; 1434 char *buf; 1435 unsigned int xdp_headroom = virtnet_get_headroom(vi); 1436 void *ctx = (void *)(unsigned long)xdp_headroom; 1437 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 1438 int err; 1439 1440 len = SKB_DATA_ALIGN(len) + 1441 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1442 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) 1443 return -ENOMEM; 1444 1445 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1446 get_page(alloc_frag->page); 1447 alloc_frag->offset += len; 1448 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, 1449 vi->hdr_len + GOOD_PACKET_LEN); 1450 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1451 if (err < 0) 1452 put_page(virt_to_head_page(buf)); 1453 return err; 1454 } 1455 1456 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 1457 gfp_t gfp) 1458 { 1459 struct page *first, *list = NULL; 1460 char *p; 1461 int i, err, offset; 1462 1463 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 1464 1465 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 1466 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 1467 first = get_a_page(rq, gfp); 1468 if (!first) { 1469 if (list) 1470 give_pages(rq, list); 1471 return -ENOMEM; 1472 } 1473 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 1474 1475 /* chain new page in list head to match sg */ 1476 first->private = (unsigned long)list; 1477 list = first; 1478 } 1479 1480 first = get_a_page(rq, gfp); 1481 if (!first) { 1482 give_pages(rq, list); 1483 return -ENOMEM; 1484 } 1485 p = page_address(first); 1486 1487 /* rq->sg[0], rq->sg[1] share the same page */ 1488 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 1489 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 1490 1491 /* rq->sg[1] for data packet, from offset */ 1492 offset = sizeof(struct padded_vnet_hdr); 1493 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 1494 1495 /* chain first in list head */ 1496 first->private = (unsigned long)list; 1497 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 1498 first, gfp); 1499 if (err < 0) 1500 give_pages(rq, first); 1501 1502 return err; 1503 } 1504 1505 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1506 struct ewma_pkt_len *avg_pkt_len, 1507 unsigned int room) 1508 { 1509 struct virtnet_info *vi = rq->vq->vdev->priv; 1510 const size_t hdr_len = vi->hdr_len; 1511 unsigned int len; 1512 1513 if (room) 1514 return PAGE_SIZE - room; 1515 1516 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1517 rq->min_buf_len, PAGE_SIZE - hdr_len); 1518 1519 return ALIGN(len, L1_CACHE_BYTES); 1520 } 1521 1522 static int add_recvbuf_mergeable(struct virtnet_info *vi, 1523 struct receive_queue *rq, gfp_t gfp) 1524 { 1525 struct page_frag *alloc_frag = &rq->alloc_frag; 1526 unsigned int headroom = virtnet_get_headroom(vi); 1527 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1528 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1529 char *buf; 1530 void *ctx; 1531 int err; 1532 unsigned int len, hole; 1533 1534 /* Extra tailroom is needed to satisfy XDP's assumption. This 1535 * means rx frags coalescing won't work, but consider we've 1536 * disabled GSO for XDP, it won't be a big issue. 1537 */ 1538 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 1539 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 1540 return -ENOMEM; 1541 1542 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1543 buf += headroom; /* advance address leaving hole at front of pkt */ 1544 get_page(alloc_frag->page); 1545 alloc_frag->offset += len + room; 1546 hole = alloc_frag->size - alloc_frag->offset; 1547 if (hole < len + room) { 1548 /* To avoid internal fragmentation, if there is very likely not 1549 * enough space for another buffer, add the remaining space to 1550 * the current buffer. 1551 * XDP core assumes that frame_size of xdp_buff and the length 1552 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 1553 */ 1554 if (!headroom) 1555 len += hole; 1556 alloc_frag->offset += hole; 1557 } 1558 1559 sg_init_one(rq->sg, buf, len); 1560 ctx = mergeable_len_to_ctx(len + room, headroom); 1561 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1562 if (err < 0) 1563 put_page(virt_to_head_page(buf)); 1564 1565 return err; 1566 } 1567 1568 /* 1569 * Returns false if we couldn't fill entirely (OOM). 1570 * 1571 * Normally run in the receive path, but can also be run from ndo_open 1572 * before we're receiving packets, or from refill_work which is 1573 * careful to disable receiving (using napi_disable). 1574 */ 1575 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 1576 gfp_t gfp) 1577 { 1578 int err; 1579 bool oom; 1580 1581 do { 1582 if (vi->mergeable_rx_bufs) 1583 err = add_recvbuf_mergeable(vi, rq, gfp); 1584 else if (vi->big_packets) 1585 err = add_recvbuf_big(vi, rq, gfp); 1586 else 1587 err = add_recvbuf_small(vi, rq, gfp); 1588 1589 oom = err == -ENOMEM; 1590 if (err) 1591 break; 1592 } while (rq->vq->num_free); 1593 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 1594 unsigned long flags; 1595 1596 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 1597 rq->stats.kicks++; 1598 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 1599 } 1600 1601 return !oom; 1602 } 1603 1604 static void skb_recv_done(struct virtqueue *rvq) 1605 { 1606 struct virtnet_info *vi = rvq->vdev->priv; 1607 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 1608 1609 virtqueue_napi_schedule(&rq->napi, rvq); 1610 } 1611 1612 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 1613 { 1614 napi_enable(napi); 1615 1616 /* If all buffers were filled by other side before we napi_enabled, we 1617 * won't get another interrupt, so process any outstanding packets now. 1618 * Call local_bh_enable after to trigger softIRQ processing. 1619 */ 1620 local_bh_disable(); 1621 virtqueue_napi_schedule(napi, vq); 1622 local_bh_enable(); 1623 } 1624 1625 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 1626 struct virtqueue *vq, 1627 struct napi_struct *napi) 1628 { 1629 if (!napi->weight) 1630 return; 1631 1632 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 1633 * enable the feature if this is likely affine with the transmit path. 1634 */ 1635 if (!vi->affinity_hint_set) { 1636 napi->weight = 0; 1637 return; 1638 } 1639 1640 return virtnet_napi_enable(vq, napi); 1641 } 1642 1643 static void virtnet_napi_tx_disable(struct napi_struct *napi) 1644 { 1645 if (napi->weight) 1646 napi_disable(napi); 1647 } 1648 1649 static void refill_work(struct work_struct *work) 1650 { 1651 struct virtnet_info *vi = 1652 container_of(work, struct virtnet_info, refill.work); 1653 bool still_empty; 1654 int i; 1655 1656 for (i = 0; i < vi->curr_queue_pairs; i++) { 1657 struct receive_queue *rq = &vi->rq[i]; 1658 1659 napi_disable(&rq->napi); 1660 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 1661 virtnet_napi_enable(rq->vq, &rq->napi); 1662 1663 /* In theory, this can happen: if we don't get any buffers in 1664 * we will *never* try to fill again. 1665 */ 1666 if (still_empty) 1667 schedule_delayed_work(&vi->refill, HZ/2); 1668 } 1669 } 1670 1671 static int virtnet_receive(struct receive_queue *rq, int budget, 1672 unsigned int *xdp_xmit) 1673 { 1674 struct virtnet_info *vi = rq->vq->vdev->priv; 1675 struct virtnet_rq_stats stats = {}; 1676 unsigned int len; 1677 void *buf; 1678 int i; 1679 1680 if (!vi->big_packets || vi->mergeable_rx_bufs) { 1681 void *ctx; 1682 1683 while (stats.packets < budget && 1684 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 1685 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); 1686 stats.packets++; 1687 } 1688 } else { 1689 while (stats.packets < budget && 1690 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 1691 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); 1692 stats.packets++; 1693 } 1694 } 1695 1696 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 1697 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 1698 spin_lock(&vi->refill_lock); 1699 if (vi->refill_enabled) 1700 schedule_delayed_work(&vi->refill, 0); 1701 spin_unlock(&vi->refill_lock); 1702 } 1703 } 1704 1705 u64_stats_update_begin(&rq->stats.syncp); 1706 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { 1707 size_t offset = virtnet_rq_stats_desc[i].offset; 1708 u64 *item; 1709 1710 item = (u64 *)((u8 *)&rq->stats + offset); 1711 *item += *(u64 *)((u8 *)&stats + offset); 1712 } 1713 u64_stats_update_end(&rq->stats.syncp); 1714 1715 return stats.packets; 1716 } 1717 1718 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) 1719 { 1720 unsigned int len; 1721 unsigned int packets = 0; 1722 unsigned int bytes = 0; 1723 void *ptr; 1724 1725 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 1726 if (likely(!is_xdp_frame(ptr))) { 1727 struct sk_buff *skb = ptr; 1728 1729 pr_debug("Sent skb %p\n", skb); 1730 1731 bytes += skb->len; 1732 napi_consume_skb(skb, in_napi); 1733 } else { 1734 struct xdp_frame *frame = ptr_to_xdp(ptr); 1735 1736 bytes += xdp_get_frame_len(frame); 1737 xdp_return_frame(frame); 1738 } 1739 packets++; 1740 } 1741 1742 /* Avoid overhead when no packets have been processed 1743 * happens when called speculatively from start_xmit. 1744 */ 1745 if (!packets) 1746 return; 1747 1748 u64_stats_update_begin(&sq->stats.syncp); 1749 sq->stats.bytes += bytes; 1750 sq->stats.packets += packets; 1751 u64_stats_update_end(&sq->stats.syncp); 1752 } 1753 1754 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1755 { 1756 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1757 return false; 1758 else if (q < vi->curr_queue_pairs) 1759 return true; 1760 else 1761 return false; 1762 } 1763 1764 static void virtnet_poll_cleantx(struct receive_queue *rq) 1765 { 1766 struct virtnet_info *vi = rq->vq->vdev->priv; 1767 unsigned int index = vq2rxq(rq->vq); 1768 struct send_queue *sq = &vi->sq[index]; 1769 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 1770 1771 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 1772 return; 1773 1774 if (__netif_tx_trylock(txq)) { 1775 if (sq->reset) { 1776 __netif_tx_unlock(txq); 1777 return; 1778 } 1779 1780 do { 1781 virtqueue_disable_cb(sq->vq); 1782 free_old_xmit_skbs(sq, true); 1783 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 1784 1785 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1786 netif_tx_wake_queue(txq); 1787 1788 __netif_tx_unlock(txq); 1789 } 1790 } 1791 1792 static int virtnet_poll(struct napi_struct *napi, int budget) 1793 { 1794 struct receive_queue *rq = 1795 container_of(napi, struct receive_queue, napi); 1796 struct virtnet_info *vi = rq->vq->vdev->priv; 1797 struct send_queue *sq; 1798 unsigned int received; 1799 unsigned int xdp_xmit = 0; 1800 1801 virtnet_poll_cleantx(rq); 1802 1803 received = virtnet_receive(rq, budget, &xdp_xmit); 1804 1805 if (xdp_xmit & VIRTIO_XDP_REDIR) 1806 xdp_do_flush(); 1807 1808 /* Out of packets? */ 1809 if (received < budget) 1810 virtqueue_napi_complete(napi, rq->vq, received); 1811 1812 if (xdp_xmit & VIRTIO_XDP_TX) { 1813 sq = virtnet_xdp_get_sq(vi); 1814 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 1815 u64_stats_update_begin(&sq->stats.syncp); 1816 sq->stats.kicks++; 1817 u64_stats_update_end(&sq->stats.syncp); 1818 } 1819 virtnet_xdp_put_sq(vi, sq); 1820 } 1821 1822 return received; 1823 } 1824 1825 static int virtnet_open(struct net_device *dev) 1826 { 1827 struct virtnet_info *vi = netdev_priv(dev); 1828 int i, err; 1829 1830 enable_delayed_refill(vi); 1831 1832 for (i = 0; i < vi->max_queue_pairs; i++) { 1833 if (i < vi->curr_queue_pairs) 1834 /* Make sure we have some buffers: if oom use wq. */ 1835 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 1836 schedule_delayed_work(&vi->refill, 0); 1837 1838 err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id); 1839 if (err < 0) 1840 return err; 1841 1842 err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, 1843 MEM_TYPE_PAGE_SHARED, NULL); 1844 if (err < 0) { 1845 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 1846 return err; 1847 } 1848 1849 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 1850 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); 1851 } 1852 1853 return 0; 1854 } 1855 1856 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 1857 { 1858 struct send_queue *sq = container_of(napi, struct send_queue, napi); 1859 struct virtnet_info *vi = sq->vq->vdev->priv; 1860 unsigned int index = vq2txq(sq->vq); 1861 struct netdev_queue *txq; 1862 int opaque; 1863 bool done; 1864 1865 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 1866 /* We don't need to enable cb for XDP */ 1867 napi_complete_done(napi, 0); 1868 return 0; 1869 } 1870 1871 txq = netdev_get_tx_queue(vi->dev, index); 1872 __netif_tx_lock(txq, raw_smp_processor_id()); 1873 virtqueue_disable_cb(sq->vq); 1874 free_old_xmit_skbs(sq, true); 1875 1876 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1877 netif_tx_wake_queue(txq); 1878 1879 opaque = virtqueue_enable_cb_prepare(sq->vq); 1880 1881 done = napi_complete_done(napi, 0); 1882 1883 if (!done) 1884 virtqueue_disable_cb(sq->vq); 1885 1886 __netif_tx_unlock(txq); 1887 1888 if (done) { 1889 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 1890 if (napi_schedule_prep(napi)) { 1891 __netif_tx_lock(txq, raw_smp_processor_id()); 1892 virtqueue_disable_cb(sq->vq); 1893 __netif_tx_unlock(txq); 1894 __napi_schedule(napi); 1895 } 1896 } 1897 } 1898 1899 return 0; 1900 } 1901 1902 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 1903 { 1904 struct virtio_net_hdr_mrg_rxbuf *hdr; 1905 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 1906 struct virtnet_info *vi = sq->vq->vdev->priv; 1907 int num_sg; 1908 unsigned hdr_len = vi->hdr_len; 1909 bool can_push; 1910 1911 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 1912 1913 can_push = vi->any_header_sg && 1914 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 1915 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 1916 /* Even if we can, don't push here yet as this would skew 1917 * csum_start offset below. */ 1918 if (can_push) 1919 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 1920 else 1921 hdr = skb_vnet_hdr(skb); 1922 1923 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 1924 virtio_is_little_endian(vi->vdev), false, 1925 0)) 1926 return -EPROTO; 1927 1928 if (vi->mergeable_rx_bufs) 1929 hdr->num_buffers = 0; 1930 1931 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 1932 if (can_push) { 1933 __skb_push(skb, hdr_len); 1934 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 1935 if (unlikely(num_sg < 0)) 1936 return num_sg; 1937 /* Pull header back to avoid skew in tx bytes calculations. */ 1938 __skb_pull(skb, hdr_len); 1939 } else { 1940 sg_set_buf(sq->sg, hdr, hdr_len); 1941 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 1942 if (unlikely(num_sg < 0)) 1943 return num_sg; 1944 num_sg++; 1945 } 1946 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 1947 } 1948 1949 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 1950 { 1951 struct virtnet_info *vi = netdev_priv(dev); 1952 int qnum = skb_get_queue_mapping(skb); 1953 struct send_queue *sq = &vi->sq[qnum]; 1954 int err; 1955 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1956 bool kick = !netdev_xmit_more(); 1957 bool use_napi = sq->napi.weight; 1958 1959 /* Free up any pending old buffers before queueing new ones. */ 1960 do { 1961 if (use_napi) 1962 virtqueue_disable_cb(sq->vq); 1963 1964 free_old_xmit_skbs(sq, false); 1965 1966 } while (use_napi && kick && 1967 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 1968 1969 /* timestamp packet in software */ 1970 skb_tx_timestamp(skb); 1971 1972 /* Try to transmit */ 1973 err = xmit_skb(sq, skb); 1974 1975 /* This should not happen! */ 1976 if (unlikely(err)) { 1977 dev->stats.tx_fifo_errors++; 1978 if (net_ratelimit()) 1979 dev_warn(&dev->dev, 1980 "Unexpected TXQ (%d) queue failure: %d\n", 1981 qnum, err); 1982 dev->stats.tx_dropped++; 1983 dev_kfree_skb_any(skb); 1984 return NETDEV_TX_OK; 1985 } 1986 1987 /* Don't wait up for transmitted skbs to be freed. */ 1988 if (!use_napi) { 1989 skb_orphan(skb); 1990 nf_reset_ct(skb); 1991 } 1992 1993 /* If running out of space, stop queue to avoid getting packets that we 1994 * are then unable to transmit. 1995 * An alternative would be to force queuing layer to requeue the skb by 1996 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1997 * returned in a normal path of operation: it means that driver is not 1998 * maintaining the TX queue stop/start state properly, and causes 1999 * the stack to do a non-trivial amount of useless work. 2000 * Since most packets only take 1 or 2 ring slots, stopping the queue 2001 * early means 16 slots are typically wasted. 2002 */ 2003 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 2004 netif_stop_subqueue(dev, qnum); 2005 if (use_napi) { 2006 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 2007 virtqueue_napi_schedule(&sq->napi, sq->vq); 2008 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 2009 /* More just got used, free them then recheck. */ 2010 free_old_xmit_skbs(sq, false); 2011 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 2012 netif_start_subqueue(dev, qnum); 2013 virtqueue_disable_cb(sq->vq); 2014 } 2015 } 2016 } 2017 2018 if (kick || netif_xmit_stopped(txq)) { 2019 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2020 u64_stats_update_begin(&sq->stats.syncp); 2021 sq->stats.kicks++; 2022 u64_stats_update_end(&sq->stats.syncp); 2023 } 2024 } 2025 2026 return NETDEV_TX_OK; 2027 } 2028 2029 static int virtnet_rx_resize(struct virtnet_info *vi, 2030 struct receive_queue *rq, u32 ring_num) 2031 { 2032 bool running = netif_running(vi->dev); 2033 int err, qindex; 2034 2035 qindex = rq - vi->rq; 2036 2037 if (running) 2038 napi_disable(&rq->napi); 2039 2040 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf); 2041 if (err) 2042 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 2043 2044 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 2045 schedule_delayed_work(&vi->refill, 0); 2046 2047 if (running) 2048 virtnet_napi_enable(rq->vq, &rq->napi); 2049 return err; 2050 } 2051 2052 static int virtnet_tx_resize(struct virtnet_info *vi, 2053 struct send_queue *sq, u32 ring_num) 2054 { 2055 bool running = netif_running(vi->dev); 2056 struct netdev_queue *txq; 2057 int err, qindex; 2058 2059 qindex = sq - vi->sq; 2060 2061 if (running) 2062 virtnet_napi_tx_disable(&sq->napi); 2063 2064 txq = netdev_get_tx_queue(vi->dev, qindex); 2065 2066 /* 1. wait all ximt complete 2067 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 2068 */ 2069 __netif_tx_lock_bh(txq); 2070 2071 /* Prevent rx poll from accessing sq. */ 2072 sq->reset = true; 2073 2074 /* Prevent the upper layer from trying to send packets. */ 2075 netif_stop_subqueue(vi->dev, qindex); 2076 2077 __netif_tx_unlock_bh(txq); 2078 2079 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 2080 if (err) 2081 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 2082 2083 __netif_tx_lock_bh(txq); 2084 sq->reset = false; 2085 netif_tx_wake_queue(txq); 2086 __netif_tx_unlock_bh(txq); 2087 2088 if (running) 2089 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 2090 return err; 2091 } 2092 2093 /* 2094 * Send command via the control virtqueue and check status. Commands 2095 * supported by the hypervisor, as indicated by feature bits, should 2096 * never fail unless improperly formatted. 2097 */ 2098 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 2099 struct scatterlist *out) 2100 { 2101 struct scatterlist *sgs[4], hdr, stat; 2102 unsigned out_num = 0, tmp; 2103 int ret; 2104 2105 /* Caller should know better */ 2106 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 2107 2108 vi->ctrl->status = ~0; 2109 vi->ctrl->hdr.class = class; 2110 vi->ctrl->hdr.cmd = cmd; 2111 /* Add header */ 2112 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 2113 sgs[out_num++] = &hdr; 2114 2115 if (out) 2116 sgs[out_num++] = out; 2117 2118 /* Add return status. */ 2119 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 2120 sgs[out_num] = &stat; 2121 2122 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); 2123 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); 2124 if (ret < 0) { 2125 dev_warn(&vi->vdev->dev, 2126 "Failed to add sgs for command vq: %d\n.", ret); 2127 return false; 2128 } 2129 2130 if (unlikely(!virtqueue_kick(vi->cvq))) 2131 return vi->ctrl->status == VIRTIO_NET_OK; 2132 2133 /* Spin for a response, the kick causes an ioport write, trapping 2134 * into the hypervisor, so the request should be handled immediately. 2135 */ 2136 while (!virtqueue_get_buf(vi->cvq, &tmp) && 2137 !virtqueue_is_broken(vi->cvq)) 2138 cpu_relax(); 2139 2140 return vi->ctrl->status == VIRTIO_NET_OK; 2141 } 2142 2143 static int virtnet_set_mac_address(struct net_device *dev, void *p) 2144 { 2145 struct virtnet_info *vi = netdev_priv(dev); 2146 struct virtio_device *vdev = vi->vdev; 2147 int ret; 2148 struct sockaddr *addr; 2149 struct scatterlist sg; 2150 2151 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 2152 return -EOPNOTSUPP; 2153 2154 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 2155 if (!addr) 2156 return -ENOMEM; 2157 2158 ret = eth_prepare_mac_addr_change(dev, addr); 2159 if (ret) 2160 goto out; 2161 2162 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 2163 sg_init_one(&sg, addr->sa_data, dev->addr_len); 2164 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2165 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 2166 dev_warn(&vdev->dev, 2167 "Failed to set mac address by vq command.\n"); 2168 ret = -EINVAL; 2169 goto out; 2170 } 2171 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 2172 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2173 unsigned int i; 2174 2175 /* Naturally, this has an atomicity problem. */ 2176 for (i = 0; i < dev->addr_len; i++) 2177 virtio_cwrite8(vdev, 2178 offsetof(struct virtio_net_config, mac) + 2179 i, addr->sa_data[i]); 2180 } 2181 2182 eth_commit_mac_addr_change(dev, p); 2183 ret = 0; 2184 2185 out: 2186 kfree(addr); 2187 return ret; 2188 } 2189 2190 static void virtnet_stats(struct net_device *dev, 2191 struct rtnl_link_stats64 *tot) 2192 { 2193 struct virtnet_info *vi = netdev_priv(dev); 2194 unsigned int start; 2195 int i; 2196 2197 for (i = 0; i < vi->max_queue_pairs; i++) { 2198 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 2199 struct receive_queue *rq = &vi->rq[i]; 2200 struct send_queue *sq = &vi->sq[i]; 2201 2202 do { 2203 start = u64_stats_fetch_begin(&sq->stats.syncp); 2204 tpackets = sq->stats.packets; 2205 tbytes = sq->stats.bytes; 2206 terrors = sq->stats.tx_timeouts; 2207 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2208 2209 do { 2210 start = u64_stats_fetch_begin(&rq->stats.syncp); 2211 rpackets = rq->stats.packets; 2212 rbytes = rq->stats.bytes; 2213 rdrops = rq->stats.drops; 2214 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2215 2216 tot->rx_packets += rpackets; 2217 tot->tx_packets += tpackets; 2218 tot->rx_bytes += rbytes; 2219 tot->tx_bytes += tbytes; 2220 tot->rx_dropped += rdrops; 2221 tot->tx_errors += terrors; 2222 } 2223 2224 tot->tx_dropped = dev->stats.tx_dropped; 2225 tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 2226 tot->rx_length_errors = dev->stats.rx_length_errors; 2227 tot->rx_frame_errors = dev->stats.rx_frame_errors; 2228 } 2229 2230 static void virtnet_ack_link_announce(struct virtnet_info *vi) 2231 { 2232 rtnl_lock(); 2233 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 2234 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 2235 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 2236 rtnl_unlock(); 2237 } 2238 2239 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2240 { 2241 struct scatterlist sg; 2242 struct net_device *dev = vi->dev; 2243 2244 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 2245 return 0; 2246 2247 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 2248 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); 2249 2250 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2251 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 2252 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 2253 queue_pairs); 2254 return -EINVAL; 2255 } else { 2256 vi->curr_queue_pairs = queue_pairs; 2257 /* virtnet_open() will refill when device is going to up. */ 2258 if (dev->flags & IFF_UP) 2259 schedule_delayed_work(&vi->refill, 0); 2260 } 2261 2262 return 0; 2263 } 2264 2265 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2266 { 2267 int err; 2268 2269 rtnl_lock(); 2270 err = _virtnet_set_queues(vi, queue_pairs); 2271 rtnl_unlock(); 2272 return err; 2273 } 2274 2275 static int virtnet_close(struct net_device *dev) 2276 { 2277 struct virtnet_info *vi = netdev_priv(dev); 2278 int i; 2279 2280 /* Make sure NAPI doesn't schedule refill work */ 2281 disable_delayed_refill(vi); 2282 /* Make sure refill_work doesn't re-enable napi! */ 2283 cancel_delayed_work_sync(&vi->refill); 2284 2285 for (i = 0; i < vi->max_queue_pairs; i++) { 2286 virtnet_napi_tx_disable(&vi->sq[i].napi); 2287 napi_disable(&vi->rq[i].napi); 2288 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 2289 } 2290 2291 return 0; 2292 } 2293 2294 static void virtnet_set_rx_mode(struct net_device *dev) 2295 { 2296 struct virtnet_info *vi = netdev_priv(dev); 2297 struct scatterlist sg[2]; 2298 struct virtio_net_ctrl_mac *mac_data; 2299 struct netdev_hw_addr *ha; 2300 int uc_count; 2301 int mc_count; 2302 void *buf; 2303 int i; 2304 2305 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 2306 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 2307 return; 2308 2309 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); 2310 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); 2311 2312 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); 2313 2314 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2315 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 2316 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 2317 vi->ctrl->promisc ? "en" : "dis"); 2318 2319 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); 2320 2321 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2322 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 2323 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 2324 vi->ctrl->allmulti ? "en" : "dis"); 2325 2326 uc_count = netdev_uc_count(dev); 2327 mc_count = netdev_mc_count(dev); 2328 /* MAC filter - use one buffer for both lists */ 2329 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 2330 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 2331 mac_data = buf; 2332 if (!buf) 2333 return; 2334 2335 sg_init_table(sg, 2); 2336 2337 /* Store the unicast list and count in the front of the buffer */ 2338 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 2339 i = 0; 2340 netdev_for_each_uc_addr(ha, dev) 2341 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2342 2343 sg_set_buf(&sg[0], mac_data, 2344 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 2345 2346 /* multicast list and count fill the end */ 2347 mac_data = (void *)&mac_data->macs[uc_count][0]; 2348 2349 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 2350 i = 0; 2351 netdev_for_each_mc_addr(ha, dev) 2352 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2353 2354 sg_set_buf(&sg[1], mac_data, 2355 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 2356 2357 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2358 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 2359 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 2360 2361 kfree(buf); 2362 } 2363 2364 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 2365 __be16 proto, u16 vid) 2366 { 2367 struct virtnet_info *vi = netdev_priv(dev); 2368 struct scatterlist sg; 2369 2370 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2371 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2372 2373 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2374 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 2375 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 2376 return 0; 2377 } 2378 2379 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 2380 __be16 proto, u16 vid) 2381 { 2382 struct virtnet_info *vi = netdev_priv(dev); 2383 struct scatterlist sg; 2384 2385 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2386 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2387 2388 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2389 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 2390 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 2391 return 0; 2392 } 2393 2394 static void virtnet_clean_affinity(struct virtnet_info *vi) 2395 { 2396 int i; 2397 2398 if (vi->affinity_hint_set) { 2399 for (i = 0; i < vi->max_queue_pairs; i++) { 2400 virtqueue_set_affinity(vi->rq[i].vq, NULL); 2401 virtqueue_set_affinity(vi->sq[i].vq, NULL); 2402 } 2403 2404 vi->affinity_hint_set = false; 2405 } 2406 } 2407 2408 static void virtnet_set_affinity(struct virtnet_info *vi) 2409 { 2410 cpumask_var_t mask; 2411 int stragglers; 2412 int group_size; 2413 int i, j, cpu; 2414 int num_cpu; 2415 int stride; 2416 2417 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 2418 virtnet_clean_affinity(vi); 2419 return; 2420 } 2421 2422 num_cpu = num_online_cpus(); 2423 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 2424 stragglers = num_cpu >= vi->curr_queue_pairs ? 2425 num_cpu % vi->curr_queue_pairs : 2426 0; 2427 cpu = cpumask_first(cpu_online_mask); 2428 2429 for (i = 0; i < vi->curr_queue_pairs; i++) { 2430 group_size = stride + (i < stragglers ? 1 : 0); 2431 2432 for (j = 0; j < group_size; j++) { 2433 cpumask_set_cpu(cpu, mask); 2434 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 2435 nr_cpu_ids, false); 2436 } 2437 virtqueue_set_affinity(vi->rq[i].vq, mask); 2438 virtqueue_set_affinity(vi->sq[i].vq, mask); 2439 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 2440 cpumask_clear(mask); 2441 } 2442 2443 vi->affinity_hint_set = true; 2444 free_cpumask_var(mask); 2445 } 2446 2447 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 2448 { 2449 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2450 node); 2451 virtnet_set_affinity(vi); 2452 return 0; 2453 } 2454 2455 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 2456 { 2457 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2458 node_dead); 2459 virtnet_set_affinity(vi); 2460 return 0; 2461 } 2462 2463 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 2464 { 2465 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2466 node); 2467 2468 virtnet_clean_affinity(vi); 2469 return 0; 2470 } 2471 2472 static enum cpuhp_state virtionet_online; 2473 2474 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 2475 { 2476 int ret; 2477 2478 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 2479 if (ret) 2480 return ret; 2481 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2482 &vi->node_dead); 2483 if (!ret) 2484 return ret; 2485 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2486 return ret; 2487 } 2488 2489 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 2490 { 2491 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2492 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2493 &vi->node_dead); 2494 } 2495 2496 static void virtnet_get_ringparam(struct net_device *dev, 2497 struct ethtool_ringparam *ring, 2498 struct kernel_ethtool_ringparam *kernel_ring, 2499 struct netlink_ext_ack *extack) 2500 { 2501 struct virtnet_info *vi = netdev_priv(dev); 2502 2503 ring->rx_max_pending = vi->rq[0].vq->num_max; 2504 ring->tx_max_pending = vi->sq[0].vq->num_max; 2505 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2506 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2507 } 2508 2509 static int virtnet_set_ringparam(struct net_device *dev, 2510 struct ethtool_ringparam *ring, 2511 struct kernel_ethtool_ringparam *kernel_ring, 2512 struct netlink_ext_ack *extack) 2513 { 2514 struct virtnet_info *vi = netdev_priv(dev); 2515 u32 rx_pending, tx_pending; 2516 struct receive_queue *rq; 2517 struct send_queue *sq; 2518 int i, err; 2519 2520 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 2521 return -EINVAL; 2522 2523 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2524 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2525 2526 if (ring->rx_pending == rx_pending && 2527 ring->tx_pending == tx_pending) 2528 return 0; 2529 2530 if (ring->rx_pending > vi->rq[0].vq->num_max) 2531 return -EINVAL; 2532 2533 if (ring->tx_pending > vi->sq[0].vq->num_max) 2534 return -EINVAL; 2535 2536 for (i = 0; i < vi->max_queue_pairs; i++) { 2537 rq = vi->rq + i; 2538 sq = vi->sq + i; 2539 2540 if (ring->tx_pending != tx_pending) { 2541 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 2542 if (err) 2543 return err; 2544 } 2545 2546 if (ring->rx_pending != rx_pending) { 2547 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 2548 if (err) 2549 return err; 2550 } 2551 } 2552 2553 return 0; 2554 } 2555 2556 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 2557 { 2558 struct net_device *dev = vi->dev; 2559 struct scatterlist sgs[4]; 2560 unsigned int sg_buf_size; 2561 2562 /* prepare sgs */ 2563 sg_init_table(sgs, 4); 2564 2565 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 2566 sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size); 2567 2568 sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); 2569 sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size); 2570 2571 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 2572 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 2573 sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size); 2574 2575 sg_buf_size = vi->rss_key_size; 2576 sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); 2577 2578 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2579 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 2580 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) { 2581 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 2582 return false; 2583 } 2584 return true; 2585 } 2586 2587 static void virtnet_init_default_rss(struct virtnet_info *vi) 2588 { 2589 u32 indir_val = 0; 2590 int i = 0; 2591 2592 vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; 2593 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 2594 vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size 2595 ? vi->rss_indir_table_size - 1 : 0; 2596 vi->ctrl->rss.unclassified_queue = 0; 2597 2598 for (; i < vi->rss_indir_table_size; ++i) { 2599 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 2600 vi->ctrl->rss.indirection_table[i] = indir_val; 2601 } 2602 2603 vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; 2604 vi->ctrl->rss.hash_key_length = vi->rss_key_size; 2605 2606 netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); 2607 } 2608 2609 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 2610 { 2611 info->data = 0; 2612 switch (info->flow_type) { 2613 case TCP_V4_FLOW: 2614 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 2615 info->data = RXH_IP_SRC | RXH_IP_DST | 2616 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2617 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 2618 info->data = RXH_IP_SRC | RXH_IP_DST; 2619 } 2620 break; 2621 case TCP_V6_FLOW: 2622 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 2623 info->data = RXH_IP_SRC | RXH_IP_DST | 2624 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2625 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 2626 info->data = RXH_IP_SRC | RXH_IP_DST; 2627 } 2628 break; 2629 case UDP_V4_FLOW: 2630 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 2631 info->data = RXH_IP_SRC | RXH_IP_DST | 2632 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2633 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 2634 info->data = RXH_IP_SRC | RXH_IP_DST; 2635 } 2636 break; 2637 case UDP_V6_FLOW: 2638 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 2639 info->data = RXH_IP_SRC | RXH_IP_DST | 2640 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2641 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 2642 info->data = RXH_IP_SRC | RXH_IP_DST; 2643 } 2644 break; 2645 case IPV4_FLOW: 2646 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 2647 info->data = RXH_IP_SRC | RXH_IP_DST; 2648 2649 break; 2650 case IPV6_FLOW: 2651 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 2652 info->data = RXH_IP_SRC | RXH_IP_DST; 2653 2654 break; 2655 default: 2656 info->data = 0; 2657 break; 2658 } 2659 } 2660 2661 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 2662 { 2663 u32 new_hashtypes = vi->rss_hash_types_saved; 2664 bool is_disable = info->data & RXH_DISCARD; 2665 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 2666 2667 /* supports only 'sd', 'sdfn' and 'r' */ 2668 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 2669 return false; 2670 2671 switch (info->flow_type) { 2672 case TCP_V4_FLOW: 2673 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 2674 if (!is_disable) 2675 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 2676 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 2677 break; 2678 case UDP_V4_FLOW: 2679 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 2680 if (!is_disable) 2681 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 2682 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 2683 break; 2684 case IPV4_FLOW: 2685 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 2686 if (!is_disable) 2687 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 2688 break; 2689 case TCP_V6_FLOW: 2690 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 2691 if (!is_disable) 2692 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 2693 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 2694 break; 2695 case UDP_V6_FLOW: 2696 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 2697 if (!is_disable) 2698 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 2699 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 2700 break; 2701 case IPV6_FLOW: 2702 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 2703 if (!is_disable) 2704 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 2705 break; 2706 default: 2707 /* unsupported flow */ 2708 return false; 2709 } 2710 2711 /* if unsupported hashtype was set */ 2712 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 2713 return false; 2714 2715 if (new_hashtypes != vi->rss_hash_types_saved) { 2716 vi->rss_hash_types_saved = new_hashtypes; 2717 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 2718 if (vi->dev->features & NETIF_F_RXHASH) 2719 return virtnet_commit_rss_command(vi); 2720 } 2721 2722 return true; 2723 } 2724 2725 static void virtnet_get_drvinfo(struct net_device *dev, 2726 struct ethtool_drvinfo *info) 2727 { 2728 struct virtnet_info *vi = netdev_priv(dev); 2729 struct virtio_device *vdev = vi->vdev; 2730 2731 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 2732 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 2733 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 2734 2735 } 2736 2737 /* TODO: Eliminate OOO packets during switching */ 2738 static int virtnet_set_channels(struct net_device *dev, 2739 struct ethtool_channels *channels) 2740 { 2741 struct virtnet_info *vi = netdev_priv(dev); 2742 u16 queue_pairs = channels->combined_count; 2743 int err; 2744 2745 /* We don't support separate rx/tx channels. 2746 * We don't allow setting 'other' channels. 2747 */ 2748 if (channels->rx_count || channels->tx_count || channels->other_count) 2749 return -EINVAL; 2750 2751 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 2752 return -EINVAL; 2753 2754 /* For now we don't support modifying channels while XDP is loaded 2755 * also when XDP is loaded all RX queues have XDP programs so we only 2756 * need to check a single RX queue. 2757 */ 2758 if (vi->rq[0].xdp_prog) 2759 return -EINVAL; 2760 2761 cpus_read_lock(); 2762 err = _virtnet_set_queues(vi, queue_pairs); 2763 if (err) { 2764 cpus_read_unlock(); 2765 goto err; 2766 } 2767 virtnet_set_affinity(vi); 2768 cpus_read_unlock(); 2769 2770 netif_set_real_num_tx_queues(dev, queue_pairs); 2771 netif_set_real_num_rx_queues(dev, queue_pairs); 2772 err: 2773 return err; 2774 } 2775 2776 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 2777 { 2778 struct virtnet_info *vi = netdev_priv(dev); 2779 unsigned int i, j; 2780 u8 *p = data; 2781 2782 switch (stringset) { 2783 case ETH_SS_STATS: 2784 for (i = 0; i < vi->curr_queue_pairs; i++) { 2785 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) 2786 ethtool_sprintf(&p, "rx_queue_%u_%s", i, 2787 virtnet_rq_stats_desc[j].desc); 2788 } 2789 2790 for (i = 0; i < vi->curr_queue_pairs; i++) { 2791 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) 2792 ethtool_sprintf(&p, "tx_queue_%u_%s", i, 2793 virtnet_sq_stats_desc[j].desc); 2794 } 2795 break; 2796 } 2797 } 2798 2799 static int virtnet_get_sset_count(struct net_device *dev, int sset) 2800 { 2801 struct virtnet_info *vi = netdev_priv(dev); 2802 2803 switch (sset) { 2804 case ETH_SS_STATS: 2805 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + 2806 VIRTNET_SQ_STATS_LEN); 2807 default: 2808 return -EOPNOTSUPP; 2809 } 2810 } 2811 2812 static void virtnet_get_ethtool_stats(struct net_device *dev, 2813 struct ethtool_stats *stats, u64 *data) 2814 { 2815 struct virtnet_info *vi = netdev_priv(dev); 2816 unsigned int idx = 0, start, i, j; 2817 const u8 *stats_base; 2818 size_t offset; 2819 2820 for (i = 0; i < vi->curr_queue_pairs; i++) { 2821 struct receive_queue *rq = &vi->rq[i]; 2822 2823 stats_base = (u8 *)&rq->stats; 2824 do { 2825 start = u64_stats_fetch_begin(&rq->stats.syncp); 2826 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 2827 offset = virtnet_rq_stats_desc[j].offset; 2828 data[idx + j] = *(u64 *)(stats_base + offset); 2829 } 2830 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2831 idx += VIRTNET_RQ_STATS_LEN; 2832 } 2833 2834 for (i = 0; i < vi->curr_queue_pairs; i++) { 2835 struct send_queue *sq = &vi->sq[i]; 2836 2837 stats_base = (u8 *)&sq->stats; 2838 do { 2839 start = u64_stats_fetch_begin(&sq->stats.syncp); 2840 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 2841 offset = virtnet_sq_stats_desc[j].offset; 2842 data[idx + j] = *(u64 *)(stats_base + offset); 2843 } 2844 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2845 idx += VIRTNET_SQ_STATS_LEN; 2846 } 2847 } 2848 2849 static void virtnet_get_channels(struct net_device *dev, 2850 struct ethtool_channels *channels) 2851 { 2852 struct virtnet_info *vi = netdev_priv(dev); 2853 2854 channels->combined_count = vi->curr_queue_pairs; 2855 channels->max_combined = vi->max_queue_pairs; 2856 channels->max_other = 0; 2857 channels->rx_count = 0; 2858 channels->tx_count = 0; 2859 channels->other_count = 0; 2860 } 2861 2862 static int virtnet_set_link_ksettings(struct net_device *dev, 2863 const struct ethtool_link_ksettings *cmd) 2864 { 2865 struct virtnet_info *vi = netdev_priv(dev); 2866 2867 return ethtool_virtdev_set_link_ksettings(dev, cmd, 2868 &vi->speed, &vi->duplex); 2869 } 2870 2871 static int virtnet_get_link_ksettings(struct net_device *dev, 2872 struct ethtool_link_ksettings *cmd) 2873 { 2874 struct virtnet_info *vi = netdev_priv(dev); 2875 2876 cmd->base.speed = vi->speed; 2877 cmd->base.duplex = vi->duplex; 2878 cmd->base.port = PORT_OTHER; 2879 2880 return 0; 2881 } 2882 2883 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 2884 struct ethtool_coalesce *ec) 2885 { 2886 struct scatterlist sgs_tx, sgs_rx; 2887 struct virtio_net_ctrl_coal_tx coal_tx; 2888 struct virtio_net_ctrl_coal_rx coal_rx; 2889 2890 coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 2891 coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 2892 sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); 2893 2894 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 2895 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 2896 &sgs_tx)) 2897 return -EINVAL; 2898 2899 /* Save parameters */ 2900 vi->tx_usecs = ec->tx_coalesce_usecs; 2901 vi->tx_max_packets = ec->tx_max_coalesced_frames; 2902 2903 coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 2904 coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 2905 sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); 2906 2907 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 2908 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 2909 &sgs_rx)) 2910 return -EINVAL; 2911 2912 /* Save parameters */ 2913 vi->rx_usecs = ec->rx_coalesce_usecs; 2914 vi->rx_max_packets = ec->rx_max_coalesced_frames; 2915 2916 return 0; 2917 } 2918 2919 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 2920 { 2921 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 2922 * feature is negotiated. 2923 */ 2924 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 2925 return -EOPNOTSUPP; 2926 2927 if (ec->tx_max_coalesced_frames > 1 || 2928 ec->rx_max_coalesced_frames != 1) 2929 return -EINVAL; 2930 2931 return 0; 2932 } 2933 2934 static int virtnet_set_coalesce(struct net_device *dev, 2935 struct ethtool_coalesce *ec, 2936 struct kernel_ethtool_coalesce *kernel_coal, 2937 struct netlink_ext_ack *extack) 2938 { 2939 struct virtnet_info *vi = netdev_priv(dev); 2940 int ret, i, napi_weight; 2941 bool update_napi = false; 2942 2943 /* Can't change NAPI weight if the link is up */ 2944 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 2945 if (napi_weight ^ vi->sq[0].napi.weight) { 2946 if (dev->flags & IFF_UP) 2947 return -EBUSY; 2948 else 2949 update_napi = true; 2950 } 2951 2952 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 2953 ret = virtnet_send_notf_coal_cmds(vi, ec); 2954 else 2955 ret = virtnet_coal_params_supported(ec); 2956 2957 if (ret) 2958 return ret; 2959 2960 if (update_napi) { 2961 for (i = 0; i < vi->max_queue_pairs; i++) 2962 vi->sq[i].napi.weight = napi_weight; 2963 } 2964 2965 return ret; 2966 } 2967 2968 static int virtnet_get_coalesce(struct net_device *dev, 2969 struct ethtool_coalesce *ec, 2970 struct kernel_ethtool_coalesce *kernel_coal, 2971 struct netlink_ext_ack *extack) 2972 { 2973 struct virtnet_info *vi = netdev_priv(dev); 2974 2975 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 2976 ec->rx_coalesce_usecs = vi->rx_usecs; 2977 ec->tx_coalesce_usecs = vi->tx_usecs; 2978 ec->tx_max_coalesced_frames = vi->tx_max_packets; 2979 ec->rx_max_coalesced_frames = vi->rx_max_packets; 2980 } else { 2981 ec->rx_max_coalesced_frames = 1; 2982 2983 if (vi->sq[0].napi.weight) 2984 ec->tx_max_coalesced_frames = 1; 2985 } 2986 2987 return 0; 2988 } 2989 2990 static void virtnet_init_settings(struct net_device *dev) 2991 { 2992 struct virtnet_info *vi = netdev_priv(dev); 2993 2994 vi->speed = SPEED_UNKNOWN; 2995 vi->duplex = DUPLEX_UNKNOWN; 2996 } 2997 2998 static void virtnet_update_settings(struct virtnet_info *vi) 2999 { 3000 u32 speed; 3001 u8 duplex; 3002 3003 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3004 return; 3005 3006 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3007 3008 if (ethtool_validate_speed(speed)) 3009 vi->speed = speed; 3010 3011 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3012 3013 if (ethtool_validate_duplex(duplex)) 3014 vi->duplex = duplex; 3015 } 3016 3017 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 3018 { 3019 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 3020 } 3021 3022 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 3023 { 3024 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 3025 } 3026 3027 static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) 3028 { 3029 struct virtnet_info *vi = netdev_priv(dev); 3030 int i; 3031 3032 if (indir) { 3033 for (i = 0; i < vi->rss_indir_table_size; ++i) 3034 indir[i] = vi->ctrl->rss.indirection_table[i]; 3035 } 3036 3037 if (key) 3038 memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); 3039 3040 if (hfunc) 3041 *hfunc = ETH_RSS_HASH_TOP; 3042 3043 return 0; 3044 } 3045 3046 static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) 3047 { 3048 struct virtnet_info *vi = netdev_priv(dev); 3049 int i; 3050 3051 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 3052 return -EOPNOTSUPP; 3053 3054 if (indir) { 3055 for (i = 0; i < vi->rss_indir_table_size; ++i) 3056 vi->ctrl->rss.indirection_table[i] = indir[i]; 3057 } 3058 if (key) 3059 memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); 3060 3061 virtnet_commit_rss_command(vi); 3062 3063 return 0; 3064 } 3065 3066 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 3067 { 3068 struct virtnet_info *vi = netdev_priv(dev); 3069 int rc = 0; 3070 3071 switch (info->cmd) { 3072 case ETHTOOL_GRXRINGS: 3073 info->data = vi->curr_queue_pairs; 3074 break; 3075 case ETHTOOL_GRXFH: 3076 virtnet_get_hashflow(vi, info); 3077 break; 3078 default: 3079 rc = -EOPNOTSUPP; 3080 } 3081 3082 return rc; 3083 } 3084 3085 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 3086 { 3087 struct virtnet_info *vi = netdev_priv(dev); 3088 int rc = 0; 3089 3090 switch (info->cmd) { 3091 case ETHTOOL_SRXFH: 3092 if (!virtnet_set_hashflow(vi, info)) 3093 rc = -EINVAL; 3094 3095 break; 3096 default: 3097 rc = -EOPNOTSUPP; 3098 } 3099 3100 return rc; 3101 } 3102 3103 static const struct ethtool_ops virtnet_ethtool_ops = { 3104 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 3105 ETHTOOL_COALESCE_USECS, 3106 .get_drvinfo = virtnet_get_drvinfo, 3107 .get_link = ethtool_op_get_link, 3108 .get_ringparam = virtnet_get_ringparam, 3109 .set_ringparam = virtnet_set_ringparam, 3110 .get_strings = virtnet_get_strings, 3111 .get_sset_count = virtnet_get_sset_count, 3112 .get_ethtool_stats = virtnet_get_ethtool_stats, 3113 .set_channels = virtnet_set_channels, 3114 .get_channels = virtnet_get_channels, 3115 .get_ts_info = ethtool_op_get_ts_info, 3116 .get_link_ksettings = virtnet_get_link_ksettings, 3117 .set_link_ksettings = virtnet_set_link_ksettings, 3118 .set_coalesce = virtnet_set_coalesce, 3119 .get_coalesce = virtnet_get_coalesce, 3120 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 3121 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 3122 .get_rxfh = virtnet_get_rxfh, 3123 .set_rxfh = virtnet_set_rxfh, 3124 .get_rxnfc = virtnet_get_rxnfc, 3125 .set_rxnfc = virtnet_set_rxnfc, 3126 }; 3127 3128 static void virtnet_freeze_down(struct virtio_device *vdev) 3129 { 3130 struct virtnet_info *vi = vdev->priv; 3131 3132 /* Make sure no work handler is accessing the device */ 3133 flush_work(&vi->config_work); 3134 3135 netif_tx_lock_bh(vi->dev); 3136 netif_device_detach(vi->dev); 3137 netif_tx_unlock_bh(vi->dev); 3138 if (netif_running(vi->dev)) 3139 virtnet_close(vi->dev); 3140 } 3141 3142 static int init_vqs(struct virtnet_info *vi); 3143 3144 static int virtnet_restore_up(struct virtio_device *vdev) 3145 { 3146 struct virtnet_info *vi = vdev->priv; 3147 int err; 3148 3149 err = init_vqs(vi); 3150 if (err) 3151 return err; 3152 3153 virtio_device_ready(vdev); 3154 3155 enable_delayed_refill(vi); 3156 3157 if (netif_running(vi->dev)) { 3158 err = virtnet_open(vi->dev); 3159 if (err) 3160 return err; 3161 } 3162 3163 netif_tx_lock_bh(vi->dev); 3164 netif_device_attach(vi->dev); 3165 netif_tx_unlock_bh(vi->dev); 3166 return err; 3167 } 3168 3169 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 3170 { 3171 struct scatterlist sg; 3172 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); 3173 3174 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); 3175 3176 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 3177 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 3178 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 3179 return -EINVAL; 3180 } 3181 3182 return 0; 3183 } 3184 3185 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 3186 { 3187 u64 offloads = 0; 3188 3189 if (!vi->guest_offloads) 3190 return 0; 3191 3192 return virtnet_set_guest_offloads(vi, offloads); 3193 } 3194 3195 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 3196 { 3197 u64 offloads = vi->guest_offloads; 3198 3199 if (!vi->guest_offloads) 3200 return 0; 3201 3202 return virtnet_set_guest_offloads(vi, offloads); 3203 } 3204 3205 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 3206 struct netlink_ext_ack *extack) 3207 { 3208 unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 3209 sizeof(struct skb_shared_info)); 3210 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 3211 struct virtnet_info *vi = netdev_priv(dev); 3212 struct bpf_prog *old_prog; 3213 u16 xdp_qp = 0, curr_qp; 3214 int i, err; 3215 3216 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 3217 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3218 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3219 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3220 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3221 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 3222 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 3223 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 3224 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 3225 return -EOPNOTSUPP; 3226 } 3227 3228 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 3229 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 3230 return -EINVAL; 3231 } 3232 3233 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 3234 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 3235 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 3236 return -EINVAL; 3237 } 3238 3239 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 3240 if (prog) 3241 xdp_qp = nr_cpu_ids; 3242 3243 /* XDP requires extra queues for XDP_TX */ 3244 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 3245 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 3246 curr_qp + xdp_qp, vi->max_queue_pairs); 3247 xdp_qp = 0; 3248 } 3249 3250 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 3251 if (!prog && !old_prog) 3252 return 0; 3253 3254 if (prog) 3255 bpf_prog_add(prog, vi->max_queue_pairs - 1); 3256 3257 /* Make sure NAPI is not using any XDP TX queues for RX. */ 3258 if (netif_running(dev)) { 3259 for (i = 0; i < vi->max_queue_pairs; i++) { 3260 napi_disable(&vi->rq[i].napi); 3261 virtnet_napi_tx_disable(&vi->sq[i].napi); 3262 } 3263 } 3264 3265 if (!prog) { 3266 for (i = 0; i < vi->max_queue_pairs; i++) { 3267 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3268 if (i == 0) 3269 virtnet_restore_guest_offloads(vi); 3270 } 3271 synchronize_net(); 3272 } 3273 3274 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 3275 if (err) 3276 goto err; 3277 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 3278 vi->xdp_queue_pairs = xdp_qp; 3279 3280 if (prog) { 3281 vi->xdp_enabled = true; 3282 for (i = 0; i < vi->max_queue_pairs; i++) { 3283 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3284 if (i == 0 && !old_prog) 3285 virtnet_clear_guest_offloads(vi); 3286 } 3287 if (!old_prog) 3288 xdp_features_set_redirect_target(dev, true); 3289 } else { 3290 xdp_features_clear_redirect_target(dev); 3291 vi->xdp_enabled = false; 3292 } 3293 3294 for (i = 0; i < vi->max_queue_pairs; i++) { 3295 if (old_prog) 3296 bpf_prog_put(old_prog); 3297 if (netif_running(dev)) { 3298 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3299 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3300 &vi->sq[i].napi); 3301 } 3302 } 3303 3304 return 0; 3305 3306 err: 3307 if (!prog) { 3308 virtnet_clear_guest_offloads(vi); 3309 for (i = 0; i < vi->max_queue_pairs; i++) 3310 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 3311 } 3312 3313 if (netif_running(dev)) { 3314 for (i = 0; i < vi->max_queue_pairs; i++) { 3315 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3316 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3317 &vi->sq[i].napi); 3318 } 3319 } 3320 if (prog) 3321 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 3322 return err; 3323 } 3324 3325 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 3326 { 3327 switch (xdp->command) { 3328 case XDP_SETUP_PROG: 3329 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 3330 default: 3331 return -EINVAL; 3332 } 3333 } 3334 3335 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 3336 size_t len) 3337 { 3338 struct virtnet_info *vi = netdev_priv(dev); 3339 int ret; 3340 3341 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3342 return -EOPNOTSUPP; 3343 3344 ret = snprintf(buf, len, "sby"); 3345 if (ret >= len) 3346 return -EOPNOTSUPP; 3347 3348 return 0; 3349 } 3350 3351 static int virtnet_set_features(struct net_device *dev, 3352 netdev_features_t features) 3353 { 3354 struct virtnet_info *vi = netdev_priv(dev); 3355 u64 offloads; 3356 int err; 3357 3358 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 3359 if (vi->xdp_enabled) 3360 return -EBUSY; 3361 3362 if (features & NETIF_F_GRO_HW) 3363 offloads = vi->guest_offloads_capable; 3364 else 3365 offloads = vi->guest_offloads_capable & 3366 ~GUEST_OFFLOAD_GRO_HW_MASK; 3367 3368 err = virtnet_set_guest_offloads(vi, offloads); 3369 if (err) 3370 return err; 3371 vi->guest_offloads = offloads; 3372 } 3373 3374 if ((dev->features ^ features) & NETIF_F_RXHASH) { 3375 if (features & NETIF_F_RXHASH) 3376 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3377 else 3378 vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 3379 3380 if (!virtnet_commit_rss_command(vi)) 3381 return -EINVAL; 3382 } 3383 3384 return 0; 3385 } 3386 3387 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 3388 { 3389 struct virtnet_info *priv = netdev_priv(dev); 3390 struct send_queue *sq = &priv->sq[txqueue]; 3391 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 3392 3393 u64_stats_update_begin(&sq->stats.syncp); 3394 sq->stats.tx_timeouts++; 3395 u64_stats_update_end(&sq->stats.syncp); 3396 3397 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 3398 txqueue, sq->name, sq->vq->index, sq->vq->name, 3399 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 3400 } 3401 3402 static const struct net_device_ops virtnet_netdev = { 3403 .ndo_open = virtnet_open, 3404 .ndo_stop = virtnet_close, 3405 .ndo_start_xmit = start_xmit, 3406 .ndo_validate_addr = eth_validate_addr, 3407 .ndo_set_mac_address = virtnet_set_mac_address, 3408 .ndo_set_rx_mode = virtnet_set_rx_mode, 3409 .ndo_get_stats64 = virtnet_stats, 3410 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 3411 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 3412 .ndo_bpf = virtnet_xdp, 3413 .ndo_xdp_xmit = virtnet_xdp_xmit, 3414 .ndo_features_check = passthru_features_check, 3415 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 3416 .ndo_set_features = virtnet_set_features, 3417 .ndo_tx_timeout = virtnet_tx_timeout, 3418 }; 3419 3420 static void virtnet_config_changed_work(struct work_struct *work) 3421 { 3422 struct virtnet_info *vi = 3423 container_of(work, struct virtnet_info, config_work); 3424 u16 v; 3425 3426 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 3427 struct virtio_net_config, status, &v) < 0) 3428 return; 3429 3430 if (v & VIRTIO_NET_S_ANNOUNCE) { 3431 netdev_notify_peers(vi->dev); 3432 virtnet_ack_link_announce(vi); 3433 } 3434 3435 /* Ignore unknown (future) status bits */ 3436 v &= VIRTIO_NET_S_LINK_UP; 3437 3438 if (vi->status == v) 3439 return; 3440 3441 vi->status = v; 3442 3443 if (vi->status & VIRTIO_NET_S_LINK_UP) { 3444 virtnet_update_settings(vi); 3445 netif_carrier_on(vi->dev); 3446 netif_tx_wake_all_queues(vi->dev); 3447 } else { 3448 netif_carrier_off(vi->dev); 3449 netif_tx_stop_all_queues(vi->dev); 3450 } 3451 } 3452 3453 static void virtnet_config_changed(struct virtio_device *vdev) 3454 { 3455 struct virtnet_info *vi = vdev->priv; 3456 3457 schedule_work(&vi->config_work); 3458 } 3459 3460 static void virtnet_free_queues(struct virtnet_info *vi) 3461 { 3462 int i; 3463 3464 for (i = 0; i < vi->max_queue_pairs; i++) { 3465 __netif_napi_del(&vi->rq[i].napi); 3466 __netif_napi_del(&vi->sq[i].napi); 3467 } 3468 3469 /* We called __netif_napi_del(), 3470 * we need to respect an RCU grace period before freeing vi->rq 3471 */ 3472 synchronize_net(); 3473 3474 kfree(vi->rq); 3475 kfree(vi->sq); 3476 kfree(vi->ctrl); 3477 } 3478 3479 static void _free_receive_bufs(struct virtnet_info *vi) 3480 { 3481 struct bpf_prog *old_prog; 3482 int i; 3483 3484 for (i = 0; i < vi->max_queue_pairs; i++) { 3485 while (vi->rq[i].pages) 3486 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 3487 3488 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 3489 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 3490 if (old_prog) 3491 bpf_prog_put(old_prog); 3492 } 3493 } 3494 3495 static void free_receive_bufs(struct virtnet_info *vi) 3496 { 3497 rtnl_lock(); 3498 _free_receive_bufs(vi); 3499 rtnl_unlock(); 3500 } 3501 3502 static void free_receive_page_frags(struct virtnet_info *vi) 3503 { 3504 int i; 3505 for (i = 0; i < vi->max_queue_pairs; i++) 3506 if (vi->rq[i].alloc_frag.page) 3507 put_page(vi->rq[i].alloc_frag.page); 3508 } 3509 3510 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 3511 { 3512 if (!is_xdp_frame(buf)) 3513 dev_kfree_skb(buf); 3514 else 3515 xdp_return_frame(ptr_to_xdp(buf)); 3516 } 3517 3518 static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) 3519 { 3520 struct virtnet_info *vi = vq->vdev->priv; 3521 int i = vq2rxq(vq); 3522 3523 if (vi->mergeable_rx_bufs) 3524 put_page(virt_to_head_page(buf)); 3525 else if (vi->big_packets) 3526 give_pages(&vi->rq[i], buf); 3527 else 3528 put_page(virt_to_head_page(buf)); 3529 } 3530 3531 static void free_unused_bufs(struct virtnet_info *vi) 3532 { 3533 void *buf; 3534 int i; 3535 3536 for (i = 0; i < vi->max_queue_pairs; i++) { 3537 struct virtqueue *vq = vi->sq[i].vq; 3538 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 3539 virtnet_sq_free_unused_buf(vq, buf); 3540 } 3541 3542 for (i = 0; i < vi->max_queue_pairs; i++) { 3543 struct virtqueue *vq = vi->rq[i].vq; 3544 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 3545 virtnet_rq_free_unused_buf(vq, buf); 3546 } 3547 } 3548 3549 static void virtnet_del_vqs(struct virtnet_info *vi) 3550 { 3551 struct virtio_device *vdev = vi->vdev; 3552 3553 virtnet_clean_affinity(vi); 3554 3555 vdev->config->del_vqs(vdev); 3556 3557 virtnet_free_queues(vi); 3558 } 3559 3560 /* How large should a single buffer be so a queue full of these can fit at 3561 * least one full packet? 3562 * Logic below assumes the mergeable buffer header is used. 3563 */ 3564 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 3565 { 3566 const unsigned int hdr_len = vi->hdr_len; 3567 unsigned int rq_size = virtqueue_get_vring_size(vq); 3568 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 3569 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 3570 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 3571 3572 return max(max(min_buf_len, hdr_len) - hdr_len, 3573 (unsigned int)GOOD_PACKET_LEN); 3574 } 3575 3576 static int virtnet_find_vqs(struct virtnet_info *vi) 3577 { 3578 vq_callback_t **callbacks; 3579 struct virtqueue **vqs; 3580 int ret = -ENOMEM; 3581 int i, total_vqs; 3582 const char **names; 3583 bool *ctx; 3584 3585 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 3586 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 3587 * possible control vq. 3588 */ 3589 total_vqs = vi->max_queue_pairs * 2 + 3590 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 3591 3592 /* Allocate space for find_vqs parameters */ 3593 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 3594 if (!vqs) 3595 goto err_vq; 3596 callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL); 3597 if (!callbacks) 3598 goto err_callback; 3599 names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL); 3600 if (!names) 3601 goto err_names; 3602 if (!vi->big_packets || vi->mergeable_rx_bufs) { 3603 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 3604 if (!ctx) 3605 goto err_ctx; 3606 } else { 3607 ctx = NULL; 3608 } 3609 3610 /* Parameters for control virtqueue, if any */ 3611 if (vi->has_cvq) { 3612 callbacks[total_vqs - 1] = NULL; 3613 names[total_vqs - 1] = "control"; 3614 } 3615 3616 /* Allocate/initialize parameters for send/receive virtqueues */ 3617 for (i = 0; i < vi->max_queue_pairs; i++) { 3618 callbacks[rxq2vq(i)] = skb_recv_done; 3619 callbacks[txq2vq(i)] = skb_xmit_done; 3620 sprintf(vi->rq[i].name, "input.%d", i); 3621 sprintf(vi->sq[i].name, "output.%d", i); 3622 names[rxq2vq(i)] = vi->rq[i].name; 3623 names[txq2vq(i)] = vi->sq[i].name; 3624 if (ctx) 3625 ctx[rxq2vq(i)] = true; 3626 } 3627 3628 ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, 3629 names, ctx, NULL); 3630 if (ret) 3631 goto err_find; 3632 3633 if (vi->has_cvq) { 3634 vi->cvq = vqs[total_vqs - 1]; 3635 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 3636 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 3637 } 3638 3639 for (i = 0; i < vi->max_queue_pairs; i++) { 3640 vi->rq[i].vq = vqs[rxq2vq(i)]; 3641 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 3642 vi->sq[i].vq = vqs[txq2vq(i)]; 3643 } 3644 3645 /* run here: ret == 0. */ 3646 3647 3648 err_find: 3649 kfree(ctx); 3650 err_ctx: 3651 kfree(names); 3652 err_names: 3653 kfree(callbacks); 3654 err_callback: 3655 kfree(vqs); 3656 err_vq: 3657 return ret; 3658 } 3659 3660 static int virtnet_alloc_queues(struct virtnet_info *vi) 3661 { 3662 int i; 3663 3664 if (vi->has_cvq) { 3665 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 3666 if (!vi->ctrl) 3667 goto err_ctrl; 3668 } else { 3669 vi->ctrl = NULL; 3670 } 3671 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 3672 if (!vi->sq) 3673 goto err_sq; 3674 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 3675 if (!vi->rq) 3676 goto err_rq; 3677 3678 INIT_DELAYED_WORK(&vi->refill, refill_work); 3679 for (i = 0; i < vi->max_queue_pairs; i++) { 3680 vi->rq[i].pages = NULL; 3681 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 3682 napi_weight); 3683 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 3684 virtnet_poll_tx, 3685 napi_tx ? napi_weight : 0); 3686 3687 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 3688 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 3689 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 3690 3691 u64_stats_init(&vi->rq[i].stats.syncp); 3692 u64_stats_init(&vi->sq[i].stats.syncp); 3693 } 3694 3695 return 0; 3696 3697 err_rq: 3698 kfree(vi->sq); 3699 err_sq: 3700 kfree(vi->ctrl); 3701 err_ctrl: 3702 return -ENOMEM; 3703 } 3704 3705 static int init_vqs(struct virtnet_info *vi) 3706 { 3707 int ret; 3708 3709 /* Allocate send & receive queues */ 3710 ret = virtnet_alloc_queues(vi); 3711 if (ret) 3712 goto err; 3713 3714 ret = virtnet_find_vqs(vi); 3715 if (ret) 3716 goto err_free; 3717 3718 cpus_read_lock(); 3719 virtnet_set_affinity(vi); 3720 cpus_read_unlock(); 3721 3722 return 0; 3723 3724 err_free: 3725 virtnet_free_queues(vi); 3726 err: 3727 return ret; 3728 } 3729 3730 #ifdef CONFIG_SYSFS 3731 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 3732 char *buf) 3733 { 3734 struct virtnet_info *vi = netdev_priv(queue->dev); 3735 unsigned int queue_index = get_netdev_rx_queue_index(queue); 3736 unsigned int headroom = virtnet_get_headroom(vi); 3737 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 3738 struct ewma_pkt_len *avg; 3739 3740 BUG_ON(queue_index >= vi->max_queue_pairs); 3741 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 3742 return sprintf(buf, "%u\n", 3743 get_mergeable_buf_len(&vi->rq[queue_index], avg, 3744 SKB_DATA_ALIGN(headroom + tailroom))); 3745 } 3746 3747 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 3748 __ATTR_RO(mergeable_rx_buffer_size); 3749 3750 static struct attribute *virtio_net_mrg_rx_attrs[] = { 3751 &mergeable_rx_buffer_size_attribute.attr, 3752 NULL 3753 }; 3754 3755 static const struct attribute_group virtio_net_mrg_rx_group = { 3756 .name = "virtio_net", 3757 .attrs = virtio_net_mrg_rx_attrs 3758 }; 3759 #endif 3760 3761 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 3762 unsigned int fbit, 3763 const char *fname, const char *dname) 3764 { 3765 if (!virtio_has_feature(vdev, fbit)) 3766 return false; 3767 3768 dev_err(&vdev->dev, "device advertises feature %s but not %s", 3769 fname, dname); 3770 3771 return true; 3772 } 3773 3774 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 3775 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 3776 3777 static bool virtnet_validate_features(struct virtio_device *vdev) 3778 { 3779 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 3780 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 3781 "VIRTIO_NET_F_CTRL_VQ") || 3782 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 3783 "VIRTIO_NET_F_CTRL_VQ") || 3784 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 3785 "VIRTIO_NET_F_CTRL_VQ") || 3786 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 3787 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 3788 "VIRTIO_NET_F_CTRL_VQ") || 3789 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 3790 "VIRTIO_NET_F_CTRL_VQ") || 3791 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 3792 "VIRTIO_NET_F_CTRL_VQ") || 3793 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 3794 "VIRTIO_NET_F_CTRL_VQ"))) { 3795 return false; 3796 } 3797 3798 return true; 3799 } 3800 3801 #define MIN_MTU ETH_MIN_MTU 3802 #define MAX_MTU ETH_MAX_MTU 3803 3804 static int virtnet_validate(struct virtio_device *vdev) 3805 { 3806 if (!vdev->config->get) { 3807 dev_err(&vdev->dev, "%s failure: config access disabled\n", 3808 __func__); 3809 return -EINVAL; 3810 } 3811 3812 if (!virtnet_validate_features(vdev)) 3813 return -EINVAL; 3814 3815 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 3816 int mtu = virtio_cread16(vdev, 3817 offsetof(struct virtio_net_config, 3818 mtu)); 3819 if (mtu < MIN_MTU) 3820 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 3821 } 3822 3823 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 3824 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 3825 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 3826 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 3827 } 3828 3829 return 0; 3830 } 3831 3832 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 3833 { 3834 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3835 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3836 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3837 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3838 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 3839 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 3840 } 3841 3842 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 3843 { 3844 bool guest_gso = virtnet_check_guest_gso(vi); 3845 3846 /* If device can receive ANY guest GSO packets, regardless of mtu, 3847 * allocate packets of maximum size, otherwise limit it to only 3848 * mtu size worth only. 3849 */ 3850 if (mtu > ETH_DATA_LEN || guest_gso) { 3851 vi->big_packets = true; 3852 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 3853 } 3854 } 3855 3856 static int virtnet_probe(struct virtio_device *vdev) 3857 { 3858 int i, err = -ENOMEM; 3859 struct net_device *dev; 3860 struct virtnet_info *vi; 3861 u16 max_queue_pairs; 3862 int mtu = 0; 3863 3864 /* Find if host supports multiqueue/rss virtio_net device */ 3865 max_queue_pairs = 1; 3866 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 3867 max_queue_pairs = 3868 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 3869 3870 /* We need at least 2 queue's */ 3871 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 3872 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 3873 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 3874 max_queue_pairs = 1; 3875 3876 /* Allocate ourselves a network device with room for our info */ 3877 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 3878 if (!dev) 3879 return -ENOMEM; 3880 3881 /* Set up network device as normal. */ 3882 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 3883 IFF_TX_SKB_NO_LINEAR; 3884 dev->netdev_ops = &virtnet_netdev; 3885 dev->features = NETIF_F_HIGHDMA; 3886 3887 dev->ethtool_ops = &virtnet_ethtool_ops; 3888 SET_NETDEV_DEV(dev, &vdev->dev); 3889 3890 /* Do we support "hardware" checksums? */ 3891 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 3892 /* This opens up the world of extra features. */ 3893 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3894 if (csum) 3895 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3896 3897 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 3898 dev->hw_features |= NETIF_F_TSO 3899 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 3900 } 3901 /* Individual feature bits: what can host handle? */ 3902 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 3903 dev->hw_features |= NETIF_F_TSO; 3904 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 3905 dev->hw_features |= NETIF_F_TSO6; 3906 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 3907 dev->hw_features |= NETIF_F_TSO_ECN; 3908 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 3909 dev->hw_features |= NETIF_F_GSO_UDP_L4; 3910 3911 dev->features |= NETIF_F_GSO_ROBUST; 3912 3913 if (gso) 3914 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 3915 /* (!csum && gso) case will be fixed by register_netdev() */ 3916 } 3917 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) 3918 dev->features |= NETIF_F_RXCSUM; 3919 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 3920 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 3921 dev->features |= NETIF_F_GRO_HW; 3922 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 3923 dev->hw_features |= NETIF_F_GRO_HW; 3924 3925 dev->vlan_features = dev->features; 3926 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 3927 3928 /* MTU range: 68 - 65535 */ 3929 dev->min_mtu = MIN_MTU; 3930 dev->max_mtu = MAX_MTU; 3931 3932 /* Configuration may specify what MAC to use. Otherwise random. */ 3933 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 3934 u8 addr[ETH_ALEN]; 3935 3936 virtio_cread_bytes(vdev, 3937 offsetof(struct virtio_net_config, mac), 3938 addr, ETH_ALEN); 3939 eth_hw_addr_set(dev, addr); 3940 } else { 3941 eth_hw_addr_random(dev); 3942 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 3943 dev->dev_addr); 3944 } 3945 3946 /* Set up our device-specific information */ 3947 vi = netdev_priv(dev); 3948 vi->dev = dev; 3949 vi->vdev = vdev; 3950 vdev->priv = vi; 3951 3952 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 3953 spin_lock_init(&vi->refill_lock); 3954 3955 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 3956 vi->mergeable_rx_bufs = true; 3957 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 3958 } 3959 3960 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 3961 vi->rx_usecs = 0; 3962 vi->tx_usecs = 0; 3963 vi->tx_max_packets = 0; 3964 vi->rx_max_packets = 0; 3965 } 3966 3967 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 3968 vi->has_rss_hash_report = true; 3969 3970 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 3971 vi->has_rss = true; 3972 3973 if (vi->has_rss || vi->has_rss_hash_report) { 3974 vi->rss_indir_table_size = 3975 virtio_cread16(vdev, offsetof(struct virtio_net_config, 3976 rss_max_indirection_table_length)); 3977 vi->rss_key_size = 3978 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 3979 3980 vi->rss_hash_types_supported = 3981 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 3982 vi->rss_hash_types_supported &= 3983 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 3984 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 3985 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 3986 3987 dev->hw_features |= NETIF_F_RXHASH; 3988 } 3989 3990 if (vi->has_rss_hash_report) 3991 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 3992 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 3993 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 3994 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3995 else 3996 vi->hdr_len = sizeof(struct virtio_net_hdr); 3997 3998 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 3999 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 4000 vi->any_header_sg = true; 4001 4002 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 4003 vi->has_cvq = true; 4004 4005 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 4006 mtu = virtio_cread16(vdev, 4007 offsetof(struct virtio_net_config, 4008 mtu)); 4009 if (mtu < dev->min_mtu) { 4010 /* Should never trigger: MTU was previously validated 4011 * in virtnet_validate. 4012 */ 4013 dev_err(&vdev->dev, 4014 "device MTU appears to have changed it is now %d < %d", 4015 mtu, dev->min_mtu); 4016 err = -EINVAL; 4017 goto free; 4018 } 4019 4020 dev->mtu = mtu; 4021 dev->max_mtu = mtu; 4022 } 4023 4024 virtnet_set_big_packets(vi, mtu); 4025 4026 if (vi->any_header_sg) 4027 dev->needed_headroom = vi->hdr_len; 4028 4029 /* Enable multiqueue by default */ 4030 if (num_online_cpus() >= max_queue_pairs) 4031 vi->curr_queue_pairs = max_queue_pairs; 4032 else 4033 vi->curr_queue_pairs = num_online_cpus(); 4034 vi->max_queue_pairs = max_queue_pairs; 4035 4036 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 4037 err = init_vqs(vi); 4038 if (err) 4039 goto free; 4040 4041 #ifdef CONFIG_SYSFS 4042 if (vi->mergeable_rx_bufs) 4043 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 4044 #endif 4045 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 4046 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 4047 4048 virtnet_init_settings(dev); 4049 4050 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 4051 vi->failover = net_failover_create(vi->dev); 4052 if (IS_ERR(vi->failover)) { 4053 err = PTR_ERR(vi->failover); 4054 goto free_vqs; 4055 } 4056 } 4057 4058 if (vi->has_rss || vi->has_rss_hash_report) 4059 virtnet_init_default_rss(vi); 4060 4061 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 4062 rtnl_lock(); 4063 4064 err = register_netdevice(dev); 4065 if (err) { 4066 pr_debug("virtio_net: registering device failed\n"); 4067 rtnl_unlock(); 4068 goto free_failover; 4069 } 4070 4071 virtio_device_ready(vdev); 4072 4073 /* a random MAC address has been assigned, notify the device. 4074 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 4075 * because many devices work fine without getting MAC explicitly 4076 */ 4077 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 4078 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 4079 struct scatterlist sg; 4080 4081 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 4082 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 4083 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 4084 pr_debug("virtio_net: setting MAC address failed\n"); 4085 rtnl_unlock(); 4086 err = -EINVAL; 4087 goto free_unregister_netdev; 4088 } 4089 } 4090 4091 rtnl_unlock(); 4092 4093 err = virtnet_cpu_notif_add(vi); 4094 if (err) { 4095 pr_debug("virtio_net: registering cpu notifier failed\n"); 4096 goto free_unregister_netdev; 4097 } 4098 4099 virtnet_set_queues(vi, vi->curr_queue_pairs); 4100 4101 /* Assume link up if device can't report link status, 4102 otherwise get link status from config. */ 4103 netif_carrier_off(dev); 4104 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 4105 schedule_work(&vi->config_work); 4106 } else { 4107 vi->status = VIRTIO_NET_S_LINK_UP; 4108 virtnet_update_settings(vi); 4109 netif_carrier_on(dev); 4110 } 4111 4112 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 4113 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 4114 set_bit(guest_offloads[i], &vi->guest_offloads); 4115 vi->guest_offloads_capable = vi->guest_offloads; 4116 4117 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 4118 dev->name, max_queue_pairs); 4119 4120 return 0; 4121 4122 free_unregister_netdev: 4123 unregister_netdev(dev); 4124 free_failover: 4125 net_failover_destroy(vi->failover); 4126 free_vqs: 4127 virtio_reset_device(vdev); 4128 cancel_delayed_work_sync(&vi->refill); 4129 free_receive_page_frags(vi); 4130 virtnet_del_vqs(vi); 4131 free: 4132 free_netdev(dev); 4133 return err; 4134 } 4135 4136 static void remove_vq_common(struct virtnet_info *vi) 4137 { 4138 virtio_reset_device(vi->vdev); 4139 4140 /* Free unused buffers in both send and recv, if any. */ 4141 free_unused_bufs(vi); 4142 4143 free_receive_bufs(vi); 4144 4145 free_receive_page_frags(vi); 4146 4147 virtnet_del_vqs(vi); 4148 } 4149 4150 static void virtnet_remove(struct virtio_device *vdev) 4151 { 4152 struct virtnet_info *vi = vdev->priv; 4153 4154 virtnet_cpu_notif_remove(vi); 4155 4156 /* Make sure no work handler is accessing the device. */ 4157 flush_work(&vi->config_work); 4158 4159 unregister_netdev(vi->dev); 4160 4161 net_failover_destroy(vi->failover); 4162 4163 remove_vq_common(vi); 4164 4165 free_netdev(vi->dev); 4166 } 4167 4168 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 4169 { 4170 struct virtnet_info *vi = vdev->priv; 4171 4172 virtnet_cpu_notif_remove(vi); 4173 virtnet_freeze_down(vdev); 4174 remove_vq_common(vi); 4175 4176 return 0; 4177 } 4178 4179 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 4180 { 4181 struct virtnet_info *vi = vdev->priv; 4182 int err; 4183 4184 err = virtnet_restore_up(vdev); 4185 if (err) 4186 return err; 4187 virtnet_set_queues(vi, vi->curr_queue_pairs); 4188 4189 err = virtnet_cpu_notif_add(vi); 4190 if (err) { 4191 virtnet_freeze_down(vdev); 4192 remove_vq_common(vi); 4193 return err; 4194 } 4195 4196 return 0; 4197 } 4198 4199 static struct virtio_device_id id_table[] = { 4200 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 4201 { 0 }, 4202 }; 4203 4204 #define VIRTNET_FEATURES \ 4205 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 4206 VIRTIO_NET_F_MAC, \ 4207 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 4208 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 4209 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 4210 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 4211 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 4212 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 4213 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 4214 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 4215 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 4216 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 4217 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 4218 VIRTIO_NET_F_GUEST_HDRLEN 4219 4220 static unsigned int features[] = { 4221 VIRTNET_FEATURES, 4222 }; 4223 4224 static unsigned int features_legacy[] = { 4225 VIRTNET_FEATURES, 4226 VIRTIO_NET_F_GSO, 4227 VIRTIO_F_ANY_LAYOUT, 4228 }; 4229 4230 static struct virtio_driver virtio_net_driver = { 4231 .feature_table = features, 4232 .feature_table_size = ARRAY_SIZE(features), 4233 .feature_table_legacy = features_legacy, 4234 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 4235 .driver.name = KBUILD_MODNAME, 4236 .driver.owner = THIS_MODULE, 4237 .id_table = id_table, 4238 .validate = virtnet_validate, 4239 .probe = virtnet_probe, 4240 .remove = virtnet_remove, 4241 .config_changed = virtnet_config_changed, 4242 #ifdef CONFIG_PM_SLEEP 4243 .freeze = virtnet_freeze, 4244 .restore = virtnet_restore, 4245 #endif 4246 }; 4247 4248 static __init int virtio_net_driver_init(void) 4249 { 4250 int ret; 4251 4252 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 4253 virtnet_cpu_online, 4254 virtnet_cpu_down_prep); 4255 if (ret < 0) 4256 goto out; 4257 virtionet_online = ret; 4258 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 4259 NULL, virtnet_cpu_dead); 4260 if (ret) 4261 goto err_dead; 4262 ret = register_virtio_driver(&virtio_net_driver); 4263 if (ret) 4264 goto err_virtio; 4265 return 0; 4266 err_virtio: 4267 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4268 err_dead: 4269 cpuhp_remove_multi_state(virtionet_online); 4270 out: 4271 return ret; 4272 } 4273 module_init(virtio_net_driver_init); 4274 4275 static __exit void virtio_net_driver_exit(void) 4276 { 4277 unregister_virtio_driver(&virtio_net_driver); 4278 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4279 cpuhp_remove_multi_state(virtionet_online); 4280 } 4281 module_exit(virtio_net_driver_exit); 4282 4283 MODULE_DEVICE_TABLE(virtio, id_table); 4284 MODULE_DESCRIPTION("Virtio network driver"); 4285 MODULE_LICENSE("GPL"); 4286