1 /* A network driver using virtio. 2 * 3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 //#define DEBUG 19 #include <linux/netdevice.h> 20 #include <linux/etherdevice.h> 21 #include <linux/ethtool.h> 22 #include <linux/module.h> 23 #include <linux/virtio.h> 24 #include <linux/virtio_net.h> 25 #include <linux/bpf.h> 26 #include <linux/bpf_trace.h> 27 #include <linux/scatterlist.h> 28 #include <linux/if_vlan.h> 29 #include <linux/slab.h> 30 #include <linux/cpu.h> 31 #include <linux/average.h> 32 #include <linux/filter.h> 33 #include <linux/kernel.h> 34 #include <linux/pci.h> 35 #include <net/route.h> 36 #include <net/xdp.h> 37 #include <net/net_failover.h> 38 39 static int napi_weight = NAPI_POLL_WEIGHT; 40 module_param(napi_weight, int, 0444); 41 42 static bool csum = true, gso = true, napi_tx; 43 module_param(csum, bool, 0444); 44 module_param(gso, bool, 0444); 45 module_param(napi_tx, bool, 0644); 46 47 /* FIXME: MTU in config. */ 48 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 49 #define GOOD_COPY_LEN 128 50 51 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 52 53 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 54 #define VIRTIO_XDP_HEADROOM 256 55 56 /* Separating two types of XDP xmit */ 57 #define VIRTIO_XDP_TX BIT(0) 58 #define VIRTIO_XDP_REDIR BIT(1) 59 60 #define VIRTIO_XDP_FLAG BIT(0) 61 62 /* RX packet size EWMA. The average packet size is used to determine the packet 63 * buffer size when refilling RX rings. As the entire RX ring may be refilled 64 * at once, the weight is chosen so that the EWMA will be insensitive to short- 65 * term, transient changes in packet size. 66 */ 67 DECLARE_EWMA(pkt_len, 0, 64) 68 69 #define VIRTNET_DRIVER_VERSION "1.0.0" 70 71 static const unsigned long guest_offloads[] = { 72 VIRTIO_NET_F_GUEST_TSO4, 73 VIRTIO_NET_F_GUEST_TSO6, 74 VIRTIO_NET_F_GUEST_ECN, 75 VIRTIO_NET_F_GUEST_UFO, 76 VIRTIO_NET_F_GUEST_CSUM 77 }; 78 79 struct virtnet_stat_desc { 80 char desc[ETH_GSTRING_LEN]; 81 size_t offset; 82 }; 83 84 struct virtnet_sq_stats { 85 struct u64_stats_sync syncp; 86 u64 packets; 87 u64 bytes; 88 u64 xdp_tx; 89 u64 xdp_tx_drops; 90 u64 kicks; 91 }; 92 93 struct virtnet_rq_stats { 94 struct u64_stats_sync syncp; 95 u64 packets; 96 u64 bytes; 97 u64 drops; 98 u64 xdp_packets; 99 u64 xdp_tx; 100 u64 xdp_redirects; 101 u64 xdp_drops; 102 u64 kicks; 103 }; 104 105 #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) 106 #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) 107 108 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 109 { "packets", VIRTNET_SQ_STAT(packets) }, 110 { "bytes", VIRTNET_SQ_STAT(bytes) }, 111 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, 112 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, 113 { "kicks", VIRTNET_SQ_STAT(kicks) }, 114 }; 115 116 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 117 { "packets", VIRTNET_RQ_STAT(packets) }, 118 { "bytes", VIRTNET_RQ_STAT(bytes) }, 119 { "drops", VIRTNET_RQ_STAT(drops) }, 120 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, 121 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, 122 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, 123 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, 124 { "kicks", VIRTNET_RQ_STAT(kicks) }, 125 }; 126 127 #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) 128 #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) 129 130 /* Internal representation of a send virtqueue */ 131 struct send_queue { 132 /* Virtqueue associated with this send _queue */ 133 struct virtqueue *vq; 134 135 /* TX: fragments + linear part + virtio header */ 136 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 137 138 /* Name of the send queue: output.$index */ 139 char name[40]; 140 141 struct virtnet_sq_stats stats; 142 143 struct napi_struct napi; 144 }; 145 146 /* Internal representation of a receive virtqueue */ 147 struct receive_queue { 148 /* Virtqueue associated with this receive_queue */ 149 struct virtqueue *vq; 150 151 struct napi_struct napi; 152 153 struct bpf_prog __rcu *xdp_prog; 154 155 struct virtnet_rq_stats stats; 156 157 /* Chain pages by the private ptr. */ 158 struct page *pages; 159 160 /* Average packet length for mergeable receive buffers. */ 161 struct ewma_pkt_len mrg_avg_pkt_len; 162 163 /* Page frag for packet buffer allocation. */ 164 struct page_frag alloc_frag; 165 166 /* RX: fragments + linear part + virtio header */ 167 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 168 169 /* Min single buffer size for mergeable buffers case. */ 170 unsigned int min_buf_len; 171 172 /* Name of this receive queue: input.$index */ 173 char name[40]; 174 175 struct xdp_rxq_info xdp_rxq; 176 }; 177 178 /* Control VQ buffers: protected by the rtnl lock */ 179 struct control_buf { 180 struct virtio_net_ctrl_hdr hdr; 181 virtio_net_ctrl_ack status; 182 struct virtio_net_ctrl_mq mq; 183 u8 promisc; 184 u8 allmulti; 185 __virtio16 vid; 186 __virtio64 offloads; 187 }; 188 189 struct virtnet_info { 190 struct virtio_device *vdev; 191 struct virtqueue *cvq; 192 struct net_device *dev; 193 struct send_queue *sq; 194 struct receive_queue *rq; 195 unsigned int status; 196 197 /* Max # of queue pairs supported by the device */ 198 u16 max_queue_pairs; 199 200 /* # of queue pairs currently used by the driver */ 201 u16 curr_queue_pairs; 202 203 /* # of XDP queue pairs currently used by the driver */ 204 u16 xdp_queue_pairs; 205 206 /* I like... big packets and I cannot lie! */ 207 bool big_packets; 208 209 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 210 bool mergeable_rx_bufs; 211 212 /* Has control virtqueue */ 213 bool has_cvq; 214 215 /* Host can handle any s/g split between our header and packet data */ 216 bool any_header_sg; 217 218 /* Packet virtio header size */ 219 u8 hdr_len; 220 221 /* Work struct for refilling if we run low on memory. */ 222 struct delayed_work refill; 223 224 /* Work struct for config space updates */ 225 struct work_struct config_work; 226 227 /* Does the affinity hint is set for virtqueues? */ 228 bool affinity_hint_set; 229 230 /* CPU hotplug instances for online & dead */ 231 struct hlist_node node; 232 struct hlist_node node_dead; 233 234 struct control_buf *ctrl; 235 236 /* Ethtool settings */ 237 u8 duplex; 238 u32 speed; 239 240 unsigned long guest_offloads; 241 unsigned long guest_offloads_capable; 242 243 /* failover when STANDBY feature enabled */ 244 struct failover *failover; 245 }; 246 247 struct padded_vnet_hdr { 248 struct virtio_net_hdr_mrg_rxbuf hdr; 249 /* 250 * hdr is in a separate sg buffer, and data sg buffer shares same page 251 * with this header sg. This padding makes next sg 16 byte aligned 252 * after the header. 253 */ 254 char padding[4]; 255 }; 256 257 static bool is_xdp_frame(void *ptr) 258 { 259 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 260 } 261 262 static void *xdp_to_ptr(struct xdp_frame *ptr) 263 { 264 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 265 } 266 267 static struct xdp_frame *ptr_to_xdp(void *ptr) 268 { 269 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 270 } 271 272 /* Converting between virtqueue no. and kernel tx/rx queue no. 273 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 274 */ 275 static int vq2txq(struct virtqueue *vq) 276 { 277 return (vq->index - 1) / 2; 278 } 279 280 static int txq2vq(int txq) 281 { 282 return txq * 2 + 1; 283 } 284 285 static int vq2rxq(struct virtqueue *vq) 286 { 287 return vq->index / 2; 288 } 289 290 static int rxq2vq(int rxq) 291 { 292 return rxq * 2; 293 } 294 295 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) 296 { 297 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; 298 } 299 300 /* 301 * private is used to chain pages for big packets, put the whole 302 * most recent used list in the beginning for reuse 303 */ 304 static void give_pages(struct receive_queue *rq, struct page *page) 305 { 306 struct page *end; 307 308 /* Find end of list, sew whole thing into vi->rq.pages. */ 309 for (end = page; end->private; end = (struct page *)end->private); 310 end->private = (unsigned long)rq->pages; 311 rq->pages = page; 312 } 313 314 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 315 { 316 struct page *p = rq->pages; 317 318 if (p) { 319 rq->pages = (struct page *)p->private; 320 /* clear private here, it is used to chain pages */ 321 p->private = 0; 322 } else 323 p = alloc_page(gfp_mask); 324 return p; 325 } 326 327 static void virtqueue_napi_schedule(struct napi_struct *napi, 328 struct virtqueue *vq) 329 { 330 if (napi_schedule_prep(napi)) { 331 virtqueue_disable_cb(vq); 332 __napi_schedule(napi); 333 } 334 } 335 336 static void virtqueue_napi_complete(struct napi_struct *napi, 337 struct virtqueue *vq, int processed) 338 { 339 int opaque; 340 341 opaque = virtqueue_enable_cb_prepare(vq); 342 if (napi_complete_done(napi, processed)) { 343 if (unlikely(virtqueue_poll(vq, opaque))) 344 virtqueue_napi_schedule(napi, vq); 345 } else { 346 virtqueue_disable_cb(vq); 347 } 348 } 349 350 static void skb_xmit_done(struct virtqueue *vq) 351 { 352 struct virtnet_info *vi = vq->vdev->priv; 353 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 354 355 /* Suppress further interrupts. */ 356 virtqueue_disable_cb(vq); 357 358 if (napi->weight) 359 virtqueue_napi_schedule(napi, vq); 360 else 361 /* We were probably waiting for more output buffers. */ 362 netif_wake_subqueue(vi->dev, vq2txq(vq)); 363 } 364 365 #define MRG_CTX_HEADER_SHIFT 22 366 static void *mergeable_len_to_ctx(unsigned int truesize, 367 unsigned int headroom) 368 { 369 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 370 } 371 372 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 373 { 374 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 375 } 376 377 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 378 { 379 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 380 } 381 382 /* Called from bottom half context */ 383 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 384 struct receive_queue *rq, 385 struct page *page, unsigned int offset, 386 unsigned int len, unsigned int truesize, 387 bool hdr_valid) 388 { 389 struct sk_buff *skb; 390 struct virtio_net_hdr_mrg_rxbuf *hdr; 391 unsigned int copy, hdr_len, hdr_padded_len; 392 char *p; 393 394 p = page_address(page) + offset; 395 396 /* copy small packet so we can reuse these pages for small data */ 397 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 398 if (unlikely(!skb)) 399 return NULL; 400 401 hdr = skb_vnet_hdr(skb); 402 403 hdr_len = vi->hdr_len; 404 if (vi->mergeable_rx_bufs) 405 hdr_padded_len = sizeof(*hdr); 406 else 407 hdr_padded_len = sizeof(struct padded_vnet_hdr); 408 409 if (hdr_valid) 410 memcpy(hdr, p, hdr_len); 411 412 len -= hdr_len; 413 offset += hdr_padded_len; 414 p += hdr_padded_len; 415 416 copy = len; 417 if (copy > skb_tailroom(skb)) 418 copy = skb_tailroom(skb); 419 skb_put_data(skb, p, copy); 420 421 len -= copy; 422 offset += copy; 423 424 if (vi->mergeable_rx_bufs) { 425 if (len) 426 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 427 else 428 put_page(page); 429 return skb; 430 } 431 432 /* 433 * Verify that we can indeed put this data into a skb. 434 * This is here to handle cases when the device erroneously 435 * tries to receive more than is possible. This is usually 436 * the case of a broken device. 437 */ 438 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 439 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 440 dev_kfree_skb(skb); 441 return NULL; 442 } 443 BUG_ON(offset >= PAGE_SIZE); 444 while (len) { 445 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 446 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 447 frag_size, truesize); 448 len -= frag_size; 449 page = (struct page *)page->private; 450 offset = 0; 451 } 452 453 if (page) 454 give_pages(rq, page); 455 456 return skb; 457 } 458 459 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 460 struct send_queue *sq, 461 struct xdp_frame *xdpf) 462 { 463 struct virtio_net_hdr_mrg_rxbuf *hdr; 464 int err; 465 466 /* virtqueue want to use data area in-front of packet */ 467 if (unlikely(xdpf->metasize > 0)) 468 return -EOPNOTSUPP; 469 470 if (unlikely(xdpf->headroom < vi->hdr_len)) 471 return -EOVERFLOW; 472 473 /* Make room for virtqueue hdr (also change xdpf->headroom?) */ 474 xdpf->data -= vi->hdr_len; 475 /* Zero header and leave csum up to XDP layers */ 476 hdr = xdpf->data; 477 memset(hdr, 0, vi->hdr_len); 478 xdpf->len += vi->hdr_len; 479 480 sg_init_one(sq->sg, xdpf->data, xdpf->len); 481 482 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), 483 GFP_ATOMIC); 484 if (unlikely(err)) 485 return -ENOSPC; /* Caller handle free/refcnt */ 486 487 return 0; 488 } 489 490 static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) 491 { 492 unsigned int qp; 493 494 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); 495 return &vi->sq[qp]; 496 } 497 498 static int virtnet_xdp_xmit(struct net_device *dev, 499 int n, struct xdp_frame **frames, u32 flags) 500 { 501 struct virtnet_info *vi = netdev_priv(dev); 502 struct receive_queue *rq = vi->rq; 503 struct bpf_prog *xdp_prog; 504 struct send_queue *sq; 505 unsigned int len; 506 int packets = 0; 507 int bytes = 0; 508 int drops = 0; 509 int kicks = 0; 510 int ret, err; 511 void *ptr; 512 int i; 513 514 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 515 * indicate XDP resources have been successfully allocated. 516 */ 517 xdp_prog = rcu_dereference(rq->xdp_prog); 518 if (!xdp_prog) 519 return -ENXIO; 520 521 sq = virtnet_xdp_sq(vi); 522 523 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 524 ret = -EINVAL; 525 drops = n; 526 goto out; 527 } 528 529 /* Free up any pending old buffers before queueing new ones. */ 530 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 531 if (likely(is_xdp_frame(ptr))) { 532 struct xdp_frame *frame = ptr_to_xdp(ptr); 533 534 bytes += frame->len; 535 xdp_return_frame(frame); 536 } else { 537 struct sk_buff *skb = ptr; 538 539 bytes += skb->len; 540 napi_consume_skb(skb, false); 541 } 542 packets++; 543 } 544 545 for (i = 0; i < n; i++) { 546 struct xdp_frame *xdpf = frames[i]; 547 548 err = __virtnet_xdp_xmit_one(vi, sq, xdpf); 549 if (err) { 550 xdp_return_frame_rx_napi(xdpf); 551 drops++; 552 } 553 } 554 ret = n - drops; 555 556 if (flags & XDP_XMIT_FLUSH) { 557 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 558 kicks = 1; 559 } 560 out: 561 u64_stats_update_begin(&sq->stats.syncp); 562 sq->stats.bytes += bytes; 563 sq->stats.packets += packets; 564 sq->stats.xdp_tx += n; 565 sq->stats.xdp_tx_drops += drops; 566 sq->stats.kicks += kicks; 567 u64_stats_update_end(&sq->stats.syncp); 568 569 return ret; 570 } 571 572 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 573 { 574 return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; 575 } 576 577 /* We copy the packet for XDP in the following cases: 578 * 579 * 1) Packet is scattered across multiple rx buffers. 580 * 2) Headroom space is insufficient. 581 * 582 * This is inefficient but it's a temporary condition that 583 * we hit right after XDP is enabled and until queue is refilled 584 * with large buffers with sufficient headroom - so it should affect 585 * at most queue size packets. 586 * Afterwards, the conditions to enable 587 * XDP should preclude the underlying device from sending packets 588 * across multiple buffers (num_buf > 1), and we make sure buffers 589 * have enough headroom. 590 */ 591 static struct page *xdp_linearize_page(struct receive_queue *rq, 592 u16 *num_buf, 593 struct page *p, 594 int offset, 595 int page_off, 596 unsigned int *len) 597 { 598 struct page *page = alloc_page(GFP_ATOMIC); 599 600 if (!page) 601 return NULL; 602 603 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 604 page_off += *len; 605 606 while (--*num_buf) { 607 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 608 unsigned int buflen; 609 void *buf; 610 int off; 611 612 buf = virtqueue_get_buf(rq->vq, &buflen); 613 if (unlikely(!buf)) 614 goto err_buf; 615 616 p = virt_to_head_page(buf); 617 off = buf - page_address(p); 618 619 /* guard against a misconfigured or uncooperative backend that 620 * is sending packet larger than the MTU. 621 */ 622 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 623 put_page(p); 624 goto err_buf; 625 } 626 627 memcpy(page_address(page) + page_off, 628 page_address(p) + off, buflen); 629 page_off += buflen; 630 put_page(p); 631 } 632 633 /* Headroom does not contribute to packet length */ 634 *len = page_off - VIRTIO_XDP_HEADROOM; 635 return page; 636 err_buf: 637 __free_pages(page, 0); 638 return NULL; 639 } 640 641 static struct sk_buff *receive_small(struct net_device *dev, 642 struct virtnet_info *vi, 643 struct receive_queue *rq, 644 void *buf, void *ctx, 645 unsigned int len, 646 unsigned int *xdp_xmit, 647 struct virtnet_rq_stats *stats) 648 { 649 struct sk_buff *skb; 650 struct bpf_prog *xdp_prog; 651 unsigned int xdp_headroom = (unsigned long)ctx; 652 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 653 unsigned int headroom = vi->hdr_len + header_offset; 654 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 655 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 656 struct page *page = virt_to_head_page(buf); 657 unsigned int delta = 0; 658 struct page *xdp_page; 659 int err; 660 661 len -= vi->hdr_len; 662 stats->bytes += len; 663 664 rcu_read_lock(); 665 xdp_prog = rcu_dereference(rq->xdp_prog); 666 if (xdp_prog) { 667 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 668 struct xdp_frame *xdpf; 669 struct xdp_buff xdp; 670 void *orig_data; 671 u32 act; 672 673 if (unlikely(hdr->hdr.gso_type)) 674 goto err_xdp; 675 676 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 677 int offset = buf - page_address(page) + header_offset; 678 unsigned int tlen = len + vi->hdr_len; 679 u16 num_buf = 1; 680 681 xdp_headroom = virtnet_get_headroom(vi); 682 header_offset = VIRTNET_RX_PAD + xdp_headroom; 683 headroom = vi->hdr_len + header_offset; 684 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 685 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 686 xdp_page = xdp_linearize_page(rq, &num_buf, page, 687 offset, header_offset, 688 &tlen); 689 if (!xdp_page) 690 goto err_xdp; 691 692 buf = page_address(xdp_page); 693 put_page(page); 694 page = xdp_page; 695 } 696 697 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; 698 xdp.data = xdp.data_hard_start + xdp_headroom; 699 xdp_set_data_meta_invalid(&xdp); 700 xdp.data_end = xdp.data + len; 701 xdp.rxq = &rq->xdp_rxq; 702 orig_data = xdp.data; 703 act = bpf_prog_run_xdp(xdp_prog, &xdp); 704 stats->xdp_packets++; 705 706 switch (act) { 707 case XDP_PASS: 708 /* Recalculate length in case bpf program changed it */ 709 delta = orig_data - xdp.data; 710 len = xdp.data_end - xdp.data; 711 break; 712 case XDP_TX: 713 stats->xdp_tx++; 714 xdpf = convert_to_xdp_frame(&xdp); 715 if (unlikely(!xdpf)) 716 goto err_xdp; 717 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 718 if (unlikely(err < 0)) { 719 trace_xdp_exception(vi->dev, xdp_prog, act); 720 goto err_xdp; 721 } 722 *xdp_xmit |= VIRTIO_XDP_TX; 723 rcu_read_unlock(); 724 goto xdp_xmit; 725 case XDP_REDIRECT: 726 stats->xdp_redirects++; 727 err = xdp_do_redirect(dev, &xdp, xdp_prog); 728 if (err) 729 goto err_xdp; 730 *xdp_xmit |= VIRTIO_XDP_REDIR; 731 rcu_read_unlock(); 732 goto xdp_xmit; 733 default: 734 bpf_warn_invalid_xdp_action(act); 735 /* fall through */ 736 case XDP_ABORTED: 737 trace_xdp_exception(vi->dev, xdp_prog, act); 738 case XDP_DROP: 739 goto err_xdp; 740 } 741 } 742 rcu_read_unlock(); 743 744 skb = build_skb(buf, buflen); 745 if (!skb) { 746 put_page(page); 747 goto err; 748 } 749 skb_reserve(skb, headroom - delta); 750 skb_put(skb, len); 751 if (!delta) { 752 buf += header_offset; 753 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); 754 } /* keep zeroed vnet hdr since packet was changed by bpf */ 755 756 err: 757 return skb; 758 759 err_xdp: 760 rcu_read_unlock(); 761 stats->xdp_drops++; 762 stats->drops++; 763 put_page(page); 764 xdp_xmit: 765 return NULL; 766 } 767 768 static struct sk_buff *receive_big(struct net_device *dev, 769 struct virtnet_info *vi, 770 struct receive_queue *rq, 771 void *buf, 772 unsigned int len, 773 struct virtnet_rq_stats *stats) 774 { 775 struct page *page = buf; 776 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, 777 PAGE_SIZE, true); 778 779 stats->bytes += len - vi->hdr_len; 780 if (unlikely(!skb)) 781 goto err; 782 783 return skb; 784 785 err: 786 stats->drops++; 787 give_pages(rq, page); 788 return NULL; 789 } 790 791 static struct sk_buff *receive_mergeable(struct net_device *dev, 792 struct virtnet_info *vi, 793 struct receive_queue *rq, 794 void *buf, 795 void *ctx, 796 unsigned int len, 797 unsigned int *xdp_xmit, 798 struct virtnet_rq_stats *stats) 799 { 800 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 801 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 802 struct page *page = virt_to_head_page(buf); 803 int offset = buf - page_address(page); 804 struct sk_buff *head_skb, *curr_skb; 805 struct bpf_prog *xdp_prog; 806 unsigned int truesize; 807 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 808 int err; 809 810 head_skb = NULL; 811 stats->bytes += len - vi->hdr_len; 812 813 rcu_read_lock(); 814 xdp_prog = rcu_dereference(rq->xdp_prog); 815 if (xdp_prog) { 816 struct xdp_frame *xdpf; 817 struct page *xdp_page; 818 struct xdp_buff xdp; 819 void *data; 820 u32 act; 821 822 /* Transient failure which in theory could occur if 823 * in-flight packets from before XDP was enabled reach 824 * the receive path after XDP is loaded. 825 */ 826 if (unlikely(hdr->hdr.gso_type)) 827 goto err_xdp; 828 829 /* This happens when rx buffer size is underestimated 830 * or headroom is not enough because of the buffer 831 * was refilled before XDP is set. This should only 832 * happen for the first several packets, so we don't 833 * care much about its performance. 834 */ 835 if (unlikely(num_buf > 1 || 836 headroom < virtnet_get_headroom(vi))) { 837 /* linearize data for XDP */ 838 xdp_page = xdp_linearize_page(rq, &num_buf, 839 page, offset, 840 VIRTIO_XDP_HEADROOM, 841 &len); 842 if (!xdp_page) 843 goto err_xdp; 844 offset = VIRTIO_XDP_HEADROOM; 845 } else { 846 xdp_page = page; 847 } 848 849 /* Allow consuming headroom but reserve enough space to push 850 * the descriptor on if we get an XDP_TX return code. 851 */ 852 data = page_address(xdp_page) + offset; 853 xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; 854 xdp.data = data + vi->hdr_len; 855 xdp_set_data_meta_invalid(&xdp); 856 xdp.data_end = xdp.data + (len - vi->hdr_len); 857 xdp.rxq = &rq->xdp_rxq; 858 859 act = bpf_prog_run_xdp(xdp_prog, &xdp); 860 stats->xdp_packets++; 861 862 switch (act) { 863 case XDP_PASS: 864 /* recalculate offset to account for any header 865 * adjustments. Note other cases do not build an 866 * skb and avoid using offset 867 */ 868 offset = xdp.data - 869 page_address(xdp_page) - vi->hdr_len; 870 871 /* recalculate len if xdp.data or xdp.data_end were 872 * adjusted 873 */ 874 len = xdp.data_end - xdp.data + vi->hdr_len; 875 /* We can only create skb based on xdp_page. */ 876 if (unlikely(xdp_page != page)) { 877 rcu_read_unlock(); 878 put_page(page); 879 head_skb = page_to_skb(vi, rq, xdp_page, 880 offset, len, 881 PAGE_SIZE, false); 882 return head_skb; 883 } 884 break; 885 case XDP_TX: 886 stats->xdp_tx++; 887 xdpf = convert_to_xdp_frame(&xdp); 888 if (unlikely(!xdpf)) 889 goto err_xdp; 890 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 891 if (unlikely(err < 0)) { 892 trace_xdp_exception(vi->dev, xdp_prog, act); 893 if (unlikely(xdp_page != page)) 894 put_page(xdp_page); 895 goto err_xdp; 896 } 897 *xdp_xmit |= VIRTIO_XDP_TX; 898 if (unlikely(xdp_page != page)) 899 put_page(page); 900 rcu_read_unlock(); 901 goto xdp_xmit; 902 case XDP_REDIRECT: 903 stats->xdp_redirects++; 904 err = xdp_do_redirect(dev, &xdp, xdp_prog); 905 if (err) { 906 if (unlikely(xdp_page != page)) 907 put_page(xdp_page); 908 goto err_xdp; 909 } 910 *xdp_xmit |= VIRTIO_XDP_REDIR; 911 if (unlikely(xdp_page != page)) 912 put_page(page); 913 rcu_read_unlock(); 914 goto xdp_xmit; 915 default: 916 bpf_warn_invalid_xdp_action(act); 917 /* fall through */ 918 case XDP_ABORTED: 919 trace_xdp_exception(vi->dev, xdp_prog, act); 920 /* fall through */ 921 case XDP_DROP: 922 if (unlikely(xdp_page != page)) 923 __free_pages(xdp_page, 0); 924 goto err_xdp; 925 } 926 } 927 rcu_read_unlock(); 928 929 truesize = mergeable_ctx_to_truesize(ctx); 930 if (unlikely(len > truesize)) { 931 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 932 dev->name, len, (unsigned long)ctx); 933 dev->stats.rx_length_errors++; 934 goto err_skb; 935 } 936 937 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); 938 curr_skb = head_skb; 939 940 if (unlikely(!curr_skb)) 941 goto err_skb; 942 while (--num_buf) { 943 int num_skb_frags; 944 945 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 946 if (unlikely(!buf)) { 947 pr_debug("%s: rx error: %d buffers out of %d missing\n", 948 dev->name, num_buf, 949 virtio16_to_cpu(vi->vdev, 950 hdr->num_buffers)); 951 dev->stats.rx_length_errors++; 952 goto err_buf; 953 } 954 955 stats->bytes += len; 956 page = virt_to_head_page(buf); 957 958 truesize = mergeable_ctx_to_truesize(ctx); 959 if (unlikely(len > truesize)) { 960 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 961 dev->name, len, (unsigned long)ctx); 962 dev->stats.rx_length_errors++; 963 goto err_skb; 964 } 965 966 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 967 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 968 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 969 970 if (unlikely(!nskb)) 971 goto err_skb; 972 if (curr_skb == head_skb) 973 skb_shinfo(curr_skb)->frag_list = nskb; 974 else 975 curr_skb->next = nskb; 976 curr_skb = nskb; 977 head_skb->truesize += nskb->truesize; 978 num_skb_frags = 0; 979 } 980 if (curr_skb != head_skb) { 981 head_skb->data_len += len; 982 head_skb->len += len; 983 head_skb->truesize += truesize; 984 } 985 offset = buf - page_address(page); 986 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 987 put_page(page); 988 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 989 len, truesize); 990 } else { 991 skb_add_rx_frag(curr_skb, num_skb_frags, page, 992 offset, len, truesize); 993 } 994 } 995 996 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 997 return head_skb; 998 999 err_xdp: 1000 rcu_read_unlock(); 1001 stats->xdp_drops++; 1002 err_skb: 1003 put_page(page); 1004 while (num_buf-- > 1) { 1005 buf = virtqueue_get_buf(rq->vq, &len); 1006 if (unlikely(!buf)) { 1007 pr_debug("%s: rx error: %d buffers missing\n", 1008 dev->name, num_buf); 1009 dev->stats.rx_length_errors++; 1010 break; 1011 } 1012 stats->bytes += len; 1013 page = virt_to_head_page(buf); 1014 put_page(page); 1015 } 1016 err_buf: 1017 stats->drops++; 1018 dev_kfree_skb(head_skb); 1019 xdp_xmit: 1020 return NULL; 1021 } 1022 1023 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1024 void *buf, unsigned int len, void **ctx, 1025 unsigned int *xdp_xmit, 1026 struct virtnet_rq_stats *stats) 1027 { 1028 struct net_device *dev = vi->dev; 1029 struct sk_buff *skb; 1030 struct virtio_net_hdr_mrg_rxbuf *hdr; 1031 1032 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1033 pr_debug("%s: short packet %i\n", dev->name, len); 1034 dev->stats.rx_length_errors++; 1035 if (vi->mergeable_rx_bufs) { 1036 put_page(virt_to_head_page(buf)); 1037 } else if (vi->big_packets) { 1038 give_pages(rq, buf); 1039 } else { 1040 put_page(virt_to_head_page(buf)); 1041 } 1042 return; 1043 } 1044 1045 if (vi->mergeable_rx_bufs) 1046 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1047 stats); 1048 else if (vi->big_packets) 1049 skb = receive_big(dev, vi, rq, buf, len, stats); 1050 else 1051 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1052 1053 if (unlikely(!skb)) 1054 return; 1055 1056 hdr = skb_vnet_hdr(skb); 1057 1058 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1059 skb->ip_summed = CHECKSUM_UNNECESSARY; 1060 1061 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1062 virtio_is_little_endian(vi->vdev))) { 1063 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1064 dev->name, hdr->hdr.gso_type, 1065 hdr->hdr.gso_size); 1066 goto frame_err; 1067 } 1068 1069 skb->protocol = eth_type_trans(skb, dev); 1070 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1071 ntohs(skb->protocol), skb->len, skb->pkt_type); 1072 1073 napi_gro_receive(&rq->napi, skb); 1074 return; 1075 1076 frame_err: 1077 dev->stats.rx_frame_errors++; 1078 dev_kfree_skb(skb); 1079 } 1080 1081 /* Unlike mergeable buffers, all buffers are allocated to the 1082 * same size, except for the headroom. For this reason we do 1083 * not need to use mergeable_len_to_ctx here - it is enough 1084 * to store the headroom as the context ignoring the truesize. 1085 */ 1086 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 1087 gfp_t gfp) 1088 { 1089 struct page_frag *alloc_frag = &rq->alloc_frag; 1090 char *buf; 1091 unsigned int xdp_headroom = virtnet_get_headroom(vi); 1092 void *ctx = (void *)(unsigned long)xdp_headroom; 1093 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 1094 int err; 1095 1096 len = SKB_DATA_ALIGN(len) + 1097 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1098 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) 1099 return -ENOMEM; 1100 1101 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1102 get_page(alloc_frag->page); 1103 alloc_frag->offset += len; 1104 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, 1105 vi->hdr_len + GOOD_PACKET_LEN); 1106 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1107 if (err < 0) 1108 put_page(virt_to_head_page(buf)); 1109 return err; 1110 } 1111 1112 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 1113 gfp_t gfp) 1114 { 1115 struct page *first, *list = NULL; 1116 char *p; 1117 int i, err, offset; 1118 1119 sg_init_table(rq->sg, MAX_SKB_FRAGS + 2); 1120 1121 /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */ 1122 for (i = MAX_SKB_FRAGS + 1; i > 1; --i) { 1123 first = get_a_page(rq, gfp); 1124 if (!first) { 1125 if (list) 1126 give_pages(rq, list); 1127 return -ENOMEM; 1128 } 1129 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 1130 1131 /* chain new page in list head to match sg */ 1132 first->private = (unsigned long)list; 1133 list = first; 1134 } 1135 1136 first = get_a_page(rq, gfp); 1137 if (!first) { 1138 give_pages(rq, list); 1139 return -ENOMEM; 1140 } 1141 p = page_address(first); 1142 1143 /* rq->sg[0], rq->sg[1] share the same page */ 1144 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 1145 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 1146 1147 /* rq->sg[1] for data packet, from offset */ 1148 offset = sizeof(struct padded_vnet_hdr); 1149 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 1150 1151 /* chain first in list head */ 1152 first->private = (unsigned long)list; 1153 err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2, 1154 first, gfp); 1155 if (err < 0) 1156 give_pages(rq, first); 1157 1158 return err; 1159 } 1160 1161 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1162 struct ewma_pkt_len *avg_pkt_len, 1163 unsigned int room) 1164 { 1165 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1166 unsigned int len; 1167 1168 if (room) 1169 return PAGE_SIZE - room; 1170 1171 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1172 rq->min_buf_len, PAGE_SIZE - hdr_len); 1173 1174 return ALIGN(len, L1_CACHE_BYTES); 1175 } 1176 1177 static int add_recvbuf_mergeable(struct virtnet_info *vi, 1178 struct receive_queue *rq, gfp_t gfp) 1179 { 1180 struct page_frag *alloc_frag = &rq->alloc_frag; 1181 unsigned int headroom = virtnet_get_headroom(vi); 1182 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1183 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1184 char *buf; 1185 void *ctx; 1186 int err; 1187 unsigned int len, hole; 1188 1189 /* Extra tailroom is needed to satisfy XDP's assumption. This 1190 * means rx frags coalescing won't work, but consider we've 1191 * disabled GSO for XDP, it won't be a big issue. 1192 */ 1193 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 1194 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 1195 return -ENOMEM; 1196 1197 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1198 buf += headroom; /* advance address leaving hole at front of pkt */ 1199 get_page(alloc_frag->page); 1200 alloc_frag->offset += len + room; 1201 hole = alloc_frag->size - alloc_frag->offset; 1202 if (hole < len + room) { 1203 /* To avoid internal fragmentation, if there is very likely not 1204 * enough space for another buffer, add the remaining space to 1205 * the current buffer. 1206 */ 1207 len += hole; 1208 alloc_frag->offset += hole; 1209 } 1210 1211 sg_init_one(rq->sg, buf, len); 1212 ctx = mergeable_len_to_ctx(len, headroom); 1213 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1214 if (err < 0) 1215 put_page(virt_to_head_page(buf)); 1216 1217 return err; 1218 } 1219 1220 /* 1221 * Returns false if we couldn't fill entirely (OOM). 1222 * 1223 * Normally run in the receive path, but can also be run from ndo_open 1224 * before we're receiving packets, or from refill_work which is 1225 * careful to disable receiving (using napi_disable). 1226 */ 1227 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 1228 gfp_t gfp) 1229 { 1230 int err; 1231 bool oom; 1232 1233 do { 1234 if (vi->mergeable_rx_bufs) 1235 err = add_recvbuf_mergeable(vi, rq, gfp); 1236 else if (vi->big_packets) 1237 err = add_recvbuf_big(vi, rq, gfp); 1238 else 1239 err = add_recvbuf_small(vi, rq, gfp); 1240 1241 oom = err == -ENOMEM; 1242 if (err) 1243 break; 1244 } while (rq->vq->num_free); 1245 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 1246 u64_stats_update_begin(&rq->stats.syncp); 1247 rq->stats.kicks++; 1248 u64_stats_update_end(&rq->stats.syncp); 1249 } 1250 1251 return !oom; 1252 } 1253 1254 static void skb_recv_done(struct virtqueue *rvq) 1255 { 1256 struct virtnet_info *vi = rvq->vdev->priv; 1257 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 1258 1259 virtqueue_napi_schedule(&rq->napi, rvq); 1260 } 1261 1262 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 1263 { 1264 napi_enable(napi); 1265 1266 /* If all buffers were filled by other side before we napi_enabled, we 1267 * won't get another interrupt, so process any outstanding packets now. 1268 * Call local_bh_enable after to trigger softIRQ processing. 1269 */ 1270 local_bh_disable(); 1271 virtqueue_napi_schedule(napi, vq); 1272 local_bh_enable(); 1273 } 1274 1275 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 1276 struct virtqueue *vq, 1277 struct napi_struct *napi) 1278 { 1279 if (!napi->weight) 1280 return; 1281 1282 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 1283 * enable the feature if this is likely affine with the transmit path. 1284 */ 1285 if (!vi->affinity_hint_set) { 1286 napi->weight = 0; 1287 return; 1288 } 1289 1290 return virtnet_napi_enable(vq, napi); 1291 } 1292 1293 static void virtnet_napi_tx_disable(struct napi_struct *napi) 1294 { 1295 if (napi->weight) 1296 napi_disable(napi); 1297 } 1298 1299 static void refill_work(struct work_struct *work) 1300 { 1301 struct virtnet_info *vi = 1302 container_of(work, struct virtnet_info, refill.work); 1303 bool still_empty; 1304 int i; 1305 1306 for (i = 0; i < vi->curr_queue_pairs; i++) { 1307 struct receive_queue *rq = &vi->rq[i]; 1308 1309 napi_disable(&rq->napi); 1310 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 1311 virtnet_napi_enable(rq->vq, &rq->napi); 1312 1313 /* In theory, this can happen: if we don't get any buffers in 1314 * we will *never* try to fill again. 1315 */ 1316 if (still_empty) 1317 schedule_delayed_work(&vi->refill, HZ/2); 1318 } 1319 } 1320 1321 static int virtnet_receive(struct receive_queue *rq, int budget, 1322 unsigned int *xdp_xmit) 1323 { 1324 struct virtnet_info *vi = rq->vq->vdev->priv; 1325 struct virtnet_rq_stats stats = {}; 1326 unsigned int len; 1327 void *buf; 1328 int i; 1329 1330 if (!vi->big_packets || vi->mergeable_rx_bufs) { 1331 void *ctx; 1332 1333 while (stats.packets < budget && 1334 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 1335 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); 1336 stats.packets++; 1337 } 1338 } else { 1339 while (stats.packets < budget && 1340 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 1341 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); 1342 stats.packets++; 1343 } 1344 } 1345 1346 if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { 1347 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) 1348 schedule_delayed_work(&vi->refill, 0); 1349 } 1350 1351 u64_stats_update_begin(&rq->stats.syncp); 1352 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { 1353 size_t offset = virtnet_rq_stats_desc[i].offset; 1354 u64 *item; 1355 1356 item = (u64 *)((u8 *)&rq->stats + offset); 1357 *item += *(u64 *)((u8 *)&stats + offset); 1358 } 1359 u64_stats_update_end(&rq->stats.syncp); 1360 1361 return stats.packets; 1362 } 1363 1364 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) 1365 { 1366 unsigned int len; 1367 unsigned int packets = 0; 1368 unsigned int bytes = 0; 1369 void *ptr; 1370 1371 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 1372 if (likely(!is_xdp_frame(ptr))) { 1373 struct sk_buff *skb = ptr; 1374 1375 pr_debug("Sent skb %p\n", skb); 1376 1377 bytes += skb->len; 1378 napi_consume_skb(skb, in_napi); 1379 } else { 1380 struct xdp_frame *frame = ptr_to_xdp(ptr); 1381 1382 bytes += frame->len; 1383 xdp_return_frame(frame); 1384 } 1385 packets++; 1386 } 1387 1388 /* Avoid overhead when no packets have been processed 1389 * happens when called speculatively from start_xmit. 1390 */ 1391 if (!packets) 1392 return; 1393 1394 u64_stats_update_begin(&sq->stats.syncp); 1395 sq->stats.bytes += bytes; 1396 sq->stats.packets += packets; 1397 u64_stats_update_end(&sq->stats.syncp); 1398 } 1399 1400 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1401 { 1402 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1403 return false; 1404 else if (q < vi->curr_queue_pairs) 1405 return true; 1406 else 1407 return false; 1408 } 1409 1410 static void virtnet_poll_cleantx(struct receive_queue *rq) 1411 { 1412 struct virtnet_info *vi = rq->vq->vdev->priv; 1413 unsigned int index = vq2rxq(rq->vq); 1414 struct send_queue *sq = &vi->sq[index]; 1415 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 1416 1417 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 1418 return; 1419 1420 if (__netif_tx_trylock(txq)) { 1421 free_old_xmit_skbs(sq, true); 1422 __netif_tx_unlock(txq); 1423 } 1424 1425 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1426 netif_tx_wake_queue(txq); 1427 } 1428 1429 static int virtnet_poll(struct napi_struct *napi, int budget) 1430 { 1431 struct receive_queue *rq = 1432 container_of(napi, struct receive_queue, napi); 1433 struct virtnet_info *vi = rq->vq->vdev->priv; 1434 struct send_queue *sq; 1435 unsigned int received; 1436 unsigned int xdp_xmit = 0; 1437 1438 virtnet_poll_cleantx(rq); 1439 1440 received = virtnet_receive(rq, budget, &xdp_xmit); 1441 1442 /* Out of packets? */ 1443 if (received < budget) 1444 virtqueue_napi_complete(napi, rq->vq, received); 1445 1446 if (xdp_xmit & VIRTIO_XDP_REDIR) 1447 xdp_do_flush_map(); 1448 1449 if (xdp_xmit & VIRTIO_XDP_TX) { 1450 sq = virtnet_xdp_sq(vi); 1451 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 1452 u64_stats_update_begin(&sq->stats.syncp); 1453 sq->stats.kicks++; 1454 u64_stats_update_end(&sq->stats.syncp); 1455 } 1456 } 1457 1458 return received; 1459 } 1460 1461 static int virtnet_open(struct net_device *dev) 1462 { 1463 struct virtnet_info *vi = netdev_priv(dev); 1464 int i, err; 1465 1466 for (i = 0; i < vi->max_queue_pairs; i++) { 1467 if (i < vi->curr_queue_pairs) 1468 /* Make sure we have some buffers: if oom use wq. */ 1469 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 1470 schedule_delayed_work(&vi->refill, 0); 1471 1472 err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i); 1473 if (err < 0) 1474 return err; 1475 1476 err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, 1477 MEM_TYPE_PAGE_SHARED, NULL); 1478 if (err < 0) { 1479 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 1480 return err; 1481 } 1482 1483 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 1484 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); 1485 } 1486 1487 return 0; 1488 } 1489 1490 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 1491 { 1492 struct send_queue *sq = container_of(napi, struct send_queue, napi); 1493 struct virtnet_info *vi = sq->vq->vdev->priv; 1494 unsigned int index = vq2txq(sq->vq); 1495 struct netdev_queue *txq; 1496 1497 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 1498 /* We don't need to enable cb for XDP */ 1499 napi_complete_done(napi, 0); 1500 return 0; 1501 } 1502 1503 txq = netdev_get_tx_queue(vi->dev, index); 1504 __netif_tx_lock(txq, raw_smp_processor_id()); 1505 free_old_xmit_skbs(sq, true); 1506 __netif_tx_unlock(txq); 1507 1508 virtqueue_napi_complete(napi, sq->vq, 0); 1509 1510 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1511 netif_tx_wake_queue(txq); 1512 1513 return 0; 1514 } 1515 1516 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 1517 { 1518 struct virtio_net_hdr_mrg_rxbuf *hdr; 1519 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 1520 struct virtnet_info *vi = sq->vq->vdev->priv; 1521 int num_sg; 1522 unsigned hdr_len = vi->hdr_len; 1523 bool can_push; 1524 1525 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 1526 1527 can_push = vi->any_header_sg && 1528 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 1529 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 1530 /* Even if we can, don't push here yet as this would skew 1531 * csum_start offset below. */ 1532 if (can_push) 1533 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 1534 else 1535 hdr = skb_vnet_hdr(skb); 1536 1537 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 1538 virtio_is_little_endian(vi->vdev), false, 1539 0)) 1540 BUG(); 1541 1542 if (vi->mergeable_rx_bufs) 1543 hdr->num_buffers = 0; 1544 1545 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 1546 if (can_push) { 1547 __skb_push(skb, hdr_len); 1548 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 1549 if (unlikely(num_sg < 0)) 1550 return num_sg; 1551 /* Pull header back to avoid skew in tx bytes calculations. */ 1552 __skb_pull(skb, hdr_len); 1553 } else { 1554 sg_set_buf(sq->sg, hdr, hdr_len); 1555 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 1556 if (unlikely(num_sg < 0)) 1557 return num_sg; 1558 num_sg++; 1559 } 1560 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 1561 } 1562 1563 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 1564 { 1565 struct virtnet_info *vi = netdev_priv(dev); 1566 int qnum = skb_get_queue_mapping(skb); 1567 struct send_queue *sq = &vi->sq[qnum]; 1568 int err; 1569 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1570 bool kick = !skb->xmit_more; 1571 bool use_napi = sq->napi.weight; 1572 1573 /* Free up any pending old buffers before queueing new ones. */ 1574 free_old_xmit_skbs(sq, false); 1575 1576 if (use_napi && kick) 1577 virtqueue_enable_cb_delayed(sq->vq); 1578 1579 /* timestamp packet in software */ 1580 skb_tx_timestamp(skb); 1581 1582 /* Try to transmit */ 1583 err = xmit_skb(sq, skb); 1584 1585 /* This should not happen! */ 1586 if (unlikely(err)) { 1587 dev->stats.tx_fifo_errors++; 1588 if (net_ratelimit()) 1589 dev_warn(&dev->dev, 1590 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err); 1591 dev->stats.tx_dropped++; 1592 dev_kfree_skb_any(skb); 1593 return NETDEV_TX_OK; 1594 } 1595 1596 /* Don't wait up for transmitted skbs to be freed. */ 1597 if (!use_napi) { 1598 skb_orphan(skb); 1599 nf_reset(skb); 1600 } 1601 1602 /* If running out of space, stop queue to avoid getting packets that we 1603 * are then unable to transmit. 1604 * An alternative would be to force queuing layer to requeue the skb by 1605 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1606 * returned in a normal path of operation: it means that driver is not 1607 * maintaining the TX queue stop/start state properly, and causes 1608 * the stack to do a non-trivial amount of useless work. 1609 * Since most packets only take 1 or 2 ring slots, stopping the queue 1610 * early means 16 slots are typically wasted. 1611 */ 1612 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1613 netif_stop_subqueue(dev, qnum); 1614 if (!use_napi && 1615 unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1616 /* More just got used, free them then recheck. */ 1617 free_old_xmit_skbs(sq, false); 1618 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1619 netif_start_subqueue(dev, qnum); 1620 virtqueue_disable_cb(sq->vq); 1621 } 1622 } 1623 } 1624 1625 if (kick || netif_xmit_stopped(txq)) { 1626 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 1627 u64_stats_update_begin(&sq->stats.syncp); 1628 sq->stats.kicks++; 1629 u64_stats_update_end(&sq->stats.syncp); 1630 } 1631 } 1632 1633 return NETDEV_TX_OK; 1634 } 1635 1636 /* 1637 * Send command via the control virtqueue and check status. Commands 1638 * supported by the hypervisor, as indicated by feature bits, should 1639 * never fail unless improperly formatted. 1640 */ 1641 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 1642 struct scatterlist *out) 1643 { 1644 struct scatterlist *sgs[4], hdr, stat; 1645 unsigned out_num = 0, tmp; 1646 1647 /* Caller should know better */ 1648 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 1649 1650 vi->ctrl->status = ~0; 1651 vi->ctrl->hdr.class = class; 1652 vi->ctrl->hdr.cmd = cmd; 1653 /* Add header */ 1654 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 1655 sgs[out_num++] = &hdr; 1656 1657 if (out) 1658 sgs[out_num++] = out; 1659 1660 /* Add return status. */ 1661 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 1662 sgs[out_num] = &stat; 1663 1664 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); 1665 virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); 1666 1667 if (unlikely(!virtqueue_kick(vi->cvq))) 1668 return vi->ctrl->status == VIRTIO_NET_OK; 1669 1670 /* Spin for a response, the kick causes an ioport write, trapping 1671 * into the hypervisor, so the request should be handled immediately. 1672 */ 1673 while (!virtqueue_get_buf(vi->cvq, &tmp) && 1674 !virtqueue_is_broken(vi->cvq)) 1675 cpu_relax(); 1676 1677 return vi->ctrl->status == VIRTIO_NET_OK; 1678 } 1679 1680 static int virtnet_set_mac_address(struct net_device *dev, void *p) 1681 { 1682 struct virtnet_info *vi = netdev_priv(dev); 1683 struct virtio_device *vdev = vi->vdev; 1684 int ret; 1685 struct sockaddr *addr; 1686 struct scatterlist sg; 1687 1688 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 1689 return -EOPNOTSUPP; 1690 1691 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 1692 if (!addr) 1693 return -ENOMEM; 1694 1695 ret = eth_prepare_mac_addr_change(dev, addr); 1696 if (ret) 1697 goto out; 1698 1699 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 1700 sg_init_one(&sg, addr->sa_data, dev->addr_len); 1701 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 1702 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 1703 dev_warn(&vdev->dev, 1704 "Failed to set mac address by vq command.\n"); 1705 ret = -EINVAL; 1706 goto out; 1707 } 1708 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 1709 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1710 unsigned int i; 1711 1712 /* Naturally, this has an atomicity problem. */ 1713 for (i = 0; i < dev->addr_len; i++) 1714 virtio_cwrite8(vdev, 1715 offsetof(struct virtio_net_config, mac) + 1716 i, addr->sa_data[i]); 1717 } 1718 1719 eth_commit_mac_addr_change(dev, p); 1720 ret = 0; 1721 1722 out: 1723 kfree(addr); 1724 return ret; 1725 } 1726 1727 static void virtnet_stats(struct net_device *dev, 1728 struct rtnl_link_stats64 *tot) 1729 { 1730 struct virtnet_info *vi = netdev_priv(dev); 1731 unsigned int start; 1732 int i; 1733 1734 for (i = 0; i < vi->max_queue_pairs; i++) { 1735 u64 tpackets, tbytes, rpackets, rbytes, rdrops; 1736 struct receive_queue *rq = &vi->rq[i]; 1737 struct send_queue *sq = &vi->sq[i]; 1738 1739 do { 1740 start = u64_stats_fetch_begin_irq(&sq->stats.syncp); 1741 tpackets = sq->stats.packets; 1742 tbytes = sq->stats.bytes; 1743 } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); 1744 1745 do { 1746 start = u64_stats_fetch_begin_irq(&rq->stats.syncp); 1747 rpackets = rq->stats.packets; 1748 rbytes = rq->stats.bytes; 1749 rdrops = rq->stats.drops; 1750 } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); 1751 1752 tot->rx_packets += rpackets; 1753 tot->tx_packets += tpackets; 1754 tot->rx_bytes += rbytes; 1755 tot->tx_bytes += tbytes; 1756 tot->rx_dropped += rdrops; 1757 } 1758 1759 tot->tx_dropped = dev->stats.tx_dropped; 1760 tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 1761 tot->rx_length_errors = dev->stats.rx_length_errors; 1762 tot->rx_frame_errors = dev->stats.rx_frame_errors; 1763 } 1764 1765 static void virtnet_ack_link_announce(struct virtnet_info *vi) 1766 { 1767 rtnl_lock(); 1768 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 1769 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 1770 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 1771 rtnl_unlock(); 1772 } 1773 1774 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 1775 { 1776 struct scatterlist sg; 1777 struct net_device *dev = vi->dev; 1778 1779 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 1780 return 0; 1781 1782 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 1783 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); 1784 1785 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 1786 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 1787 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 1788 queue_pairs); 1789 return -EINVAL; 1790 } else { 1791 vi->curr_queue_pairs = queue_pairs; 1792 /* virtnet_open() will refill when device is going to up. */ 1793 if (dev->flags & IFF_UP) 1794 schedule_delayed_work(&vi->refill, 0); 1795 } 1796 1797 return 0; 1798 } 1799 1800 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 1801 { 1802 int err; 1803 1804 rtnl_lock(); 1805 err = _virtnet_set_queues(vi, queue_pairs); 1806 rtnl_unlock(); 1807 return err; 1808 } 1809 1810 static int virtnet_close(struct net_device *dev) 1811 { 1812 struct virtnet_info *vi = netdev_priv(dev); 1813 int i; 1814 1815 /* Make sure refill_work doesn't re-enable napi! */ 1816 cancel_delayed_work_sync(&vi->refill); 1817 1818 for (i = 0; i < vi->max_queue_pairs; i++) { 1819 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 1820 napi_disable(&vi->rq[i].napi); 1821 virtnet_napi_tx_disable(&vi->sq[i].napi); 1822 } 1823 1824 return 0; 1825 } 1826 1827 static void virtnet_set_rx_mode(struct net_device *dev) 1828 { 1829 struct virtnet_info *vi = netdev_priv(dev); 1830 struct scatterlist sg[2]; 1831 struct virtio_net_ctrl_mac *mac_data; 1832 struct netdev_hw_addr *ha; 1833 int uc_count; 1834 int mc_count; 1835 void *buf; 1836 int i; 1837 1838 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 1839 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 1840 return; 1841 1842 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); 1843 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); 1844 1845 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); 1846 1847 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 1848 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 1849 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 1850 vi->ctrl->promisc ? "en" : "dis"); 1851 1852 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); 1853 1854 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 1855 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 1856 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 1857 vi->ctrl->allmulti ? "en" : "dis"); 1858 1859 uc_count = netdev_uc_count(dev); 1860 mc_count = netdev_mc_count(dev); 1861 /* MAC filter - use one buffer for both lists */ 1862 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 1863 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 1864 mac_data = buf; 1865 if (!buf) 1866 return; 1867 1868 sg_init_table(sg, 2); 1869 1870 /* Store the unicast list and count in the front of the buffer */ 1871 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 1872 i = 0; 1873 netdev_for_each_uc_addr(ha, dev) 1874 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 1875 1876 sg_set_buf(&sg[0], mac_data, 1877 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 1878 1879 /* multicast list and count fill the end */ 1880 mac_data = (void *)&mac_data->macs[uc_count][0]; 1881 1882 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 1883 i = 0; 1884 netdev_for_each_mc_addr(ha, dev) 1885 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 1886 1887 sg_set_buf(&sg[1], mac_data, 1888 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 1889 1890 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 1891 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 1892 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 1893 1894 kfree(buf); 1895 } 1896 1897 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 1898 __be16 proto, u16 vid) 1899 { 1900 struct virtnet_info *vi = netdev_priv(dev); 1901 struct scatterlist sg; 1902 1903 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 1904 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 1905 1906 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 1907 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 1908 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 1909 return 0; 1910 } 1911 1912 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 1913 __be16 proto, u16 vid) 1914 { 1915 struct virtnet_info *vi = netdev_priv(dev); 1916 struct scatterlist sg; 1917 1918 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 1919 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 1920 1921 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 1922 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 1923 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 1924 return 0; 1925 } 1926 1927 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) 1928 { 1929 int i; 1930 1931 if (vi->affinity_hint_set) { 1932 for (i = 0; i < vi->max_queue_pairs; i++) { 1933 virtqueue_set_affinity(vi->rq[i].vq, NULL); 1934 virtqueue_set_affinity(vi->sq[i].vq, NULL); 1935 } 1936 1937 vi->affinity_hint_set = false; 1938 } 1939 } 1940 1941 static void virtnet_set_affinity(struct virtnet_info *vi) 1942 { 1943 cpumask_var_t mask; 1944 int stragglers; 1945 int group_size; 1946 int i, j, cpu; 1947 int num_cpu; 1948 int stride; 1949 1950 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 1951 virtnet_clean_affinity(vi, -1); 1952 return; 1953 } 1954 1955 num_cpu = num_online_cpus(); 1956 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 1957 stragglers = num_cpu >= vi->curr_queue_pairs ? 1958 num_cpu % vi->curr_queue_pairs : 1959 0; 1960 cpu = cpumask_next(-1, cpu_online_mask); 1961 1962 for (i = 0; i < vi->curr_queue_pairs; i++) { 1963 group_size = stride + (i < stragglers ? 1 : 0); 1964 1965 for (j = 0; j < group_size; j++) { 1966 cpumask_set_cpu(cpu, mask); 1967 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 1968 nr_cpu_ids, false); 1969 } 1970 virtqueue_set_affinity(vi->rq[i].vq, mask); 1971 virtqueue_set_affinity(vi->sq[i].vq, mask); 1972 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); 1973 cpumask_clear(mask); 1974 } 1975 1976 vi->affinity_hint_set = true; 1977 free_cpumask_var(mask); 1978 } 1979 1980 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 1981 { 1982 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 1983 node); 1984 virtnet_set_affinity(vi); 1985 return 0; 1986 } 1987 1988 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 1989 { 1990 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 1991 node_dead); 1992 virtnet_set_affinity(vi); 1993 return 0; 1994 } 1995 1996 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 1997 { 1998 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 1999 node); 2000 2001 virtnet_clean_affinity(vi, cpu); 2002 return 0; 2003 } 2004 2005 static enum cpuhp_state virtionet_online; 2006 2007 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 2008 { 2009 int ret; 2010 2011 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 2012 if (ret) 2013 return ret; 2014 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2015 &vi->node_dead); 2016 if (!ret) 2017 return ret; 2018 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2019 return ret; 2020 } 2021 2022 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 2023 { 2024 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2025 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2026 &vi->node_dead); 2027 } 2028 2029 static void virtnet_get_ringparam(struct net_device *dev, 2030 struct ethtool_ringparam *ring) 2031 { 2032 struct virtnet_info *vi = netdev_priv(dev); 2033 2034 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2035 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2036 ring->rx_pending = ring->rx_max_pending; 2037 ring->tx_pending = ring->tx_max_pending; 2038 } 2039 2040 2041 static void virtnet_get_drvinfo(struct net_device *dev, 2042 struct ethtool_drvinfo *info) 2043 { 2044 struct virtnet_info *vi = netdev_priv(dev); 2045 struct virtio_device *vdev = vi->vdev; 2046 2047 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 2048 strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 2049 strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 2050 2051 } 2052 2053 /* TODO: Eliminate OOO packets during switching */ 2054 static int virtnet_set_channels(struct net_device *dev, 2055 struct ethtool_channels *channels) 2056 { 2057 struct virtnet_info *vi = netdev_priv(dev); 2058 u16 queue_pairs = channels->combined_count; 2059 int err; 2060 2061 /* We don't support separate rx/tx channels. 2062 * We don't allow setting 'other' channels. 2063 */ 2064 if (channels->rx_count || channels->tx_count || channels->other_count) 2065 return -EINVAL; 2066 2067 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 2068 return -EINVAL; 2069 2070 /* For now we don't support modifying channels while XDP is loaded 2071 * also when XDP is loaded all RX queues have XDP programs so we only 2072 * need to check a single RX queue. 2073 */ 2074 if (vi->rq[0].xdp_prog) 2075 return -EINVAL; 2076 2077 get_online_cpus(); 2078 err = _virtnet_set_queues(vi, queue_pairs); 2079 if (!err) { 2080 netif_set_real_num_tx_queues(dev, queue_pairs); 2081 netif_set_real_num_rx_queues(dev, queue_pairs); 2082 2083 virtnet_set_affinity(vi); 2084 } 2085 put_online_cpus(); 2086 2087 return err; 2088 } 2089 2090 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 2091 { 2092 struct virtnet_info *vi = netdev_priv(dev); 2093 char *p = (char *)data; 2094 unsigned int i, j; 2095 2096 switch (stringset) { 2097 case ETH_SS_STATS: 2098 for (i = 0; i < vi->curr_queue_pairs; i++) { 2099 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 2100 snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", 2101 i, virtnet_rq_stats_desc[j].desc); 2102 p += ETH_GSTRING_LEN; 2103 } 2104 } 2105 2106 for (i = 0; i < vi->curr_queue_pairs; i++) { 2107 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 2108 snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", 2109 i, virtnet_sq_stats_desc[j].desc); 2110 p += ETH_GSTRING_LEN; 2111 } 2112 } 2113 break; 2114 } 2115 } 2116 2117 static int virtnet_get_sset_count(struct net_device *dev, int sset) 2118 { 2119 struct virtnet_info *vi = netdev_priv(dev); 2120 2121 switch (sset) { 2122 case ETH_SS_STATS: 2123 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + 2124 VIRTNET_SQ_STATS_LEN); 2125 default: 2126 return -EOPNOTSUPP; 2127 } 2128 } 2129 2130 static void virtnet_get_ethtool_stats(struct net_device *dev, 2131 struct ethtool_stats *stats, u64 *data) 2132 { 2133 struct virtnet_info *vi = netdev_priv(dev); 2134 unsigned int idx = 0, start, i, j; 2135 const u8 *stats_base; 2136 size_t offset; 2137 2138 for (i = 0; i < vi->curr_queue_pairs; i++) { 2139 struct receive_queue *rq = &vi->rq[i]; 2140 2141 stats_base = (u8 *)&rq->stats; 2142 do { 2143 start = u64_stats_fetch_begin_irq(&rq->stats.syncp); 2144 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 2145 offset = virtnet_rq_stats_desc[j].offset; 2146 data[idx + j] = *(u64 *)(stats_base + offset); 2147 } 2148 } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); 2149 idx += VIRTNET_RQ_STATS_LEN; 2150 } 2151 2152 for (i = 0; i < vi->curr_queue_pairs; i++) { 2153 struct send_queue *sq = &vi->sq[i]; 2154 2155 stats_base = (u8 *)&sq->stats; 2156 do { 2157 start = u64_stats_fetch_begin_irq(&sq->stats.syncp); 2158 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 2159 offset = virtnet_sq_stats_desc[j].offset; 2160 data[idx + j] = *(u64 *)(stats_base + offset); 2161 } 2162 } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); 2163 idx += VIRTNET_SQ_STATS_LEN; 2164 } 2165 } 2166 2167 static void virtnet_get_channels(struct net_device *dev, 2168 struct ethtool_channels *channels) 2169 { 2170 struct virtnet_info *vi = netdev_priv(dev); 2171 2172 channels->combined_count = vi->curr_queue_pairs; 2173 channels->max_combined = vi->max_queue_pairs; 2174 channels->max_other = 0; 2175 channels->rx_count = 0; 2176 channels->tx_count = 0; 2177 channels->other_count = 0; 2178 } 2179 2180 /* Check if the user is trying to change anything besides speed/duplex */ 2181 static bool 2182 virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd) 2183 { 2184 struct ethtool_link_ksettings diff1 = *cmd; 2185 struct ethtool_link_ksettings diff2 = {}; 2186 2187 /* cmd is always set so we need to clear it, validate the port type 2188 * and also without autonegotiation we can ignore advertising 2189 */ 2190 diff1.base.speed = 0; 2191 diff2.base.port = PORT_OTHER; 2192 ethtool_link_ksettings_zero_link_mode(&diff1, advertising); 2193 diff1.base.duplex = 0; 2194 diff1.base.cmd = 0; 2195 diff1.base.link_mode_masks_nwords = 0; 2196 2197 return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) && 2198 bitmap_empty(diff1.link_modes.supported, 2199 __ETHTOOL_LINK_MODE_MASK_NBITS) && 2200 bitmap_empty(diff1.link_modes.advertising, 2201 __ETHTOOL_LINK_MODE_MASK_NBITS) && 2202 bitmap_empty(diff1.link_modes.lp_advertising, 2203 __ETHTOOL_LINK_MODE_MASK_NBITS); 2204 } 2205 2206 static int virtnet_set_link_ksettings(struct net_device *dev, 2207 const struct ethtool_link_ksettings *cmd) 2208 { 2209 struct virtnet_info *vi = netdev_priv(dev); 2210 u32 speed; 2211 2212 speed = cmd->base.speed; 2213 /* don't allow custom speed and duplex */ 2214 if (!ethtool_validate_speed(speed) || 2215 !ethtool_validate_duplex(cmd->base.duplex) || 2216 !virtnet_validate_ethtool_cmd(cmd)) 2217 return -EINVAL; 2218 vi->speed = speed; 2219 vi->duplex = cmd->base.duplex; 2220 2221 return 0; 2222 } 2223 2224 static int virtnet_get_link_ksettings(struct net_device *dev, 2225 struct ethtool_link_ksettings *cmd) 2226 { 2227 struct virtnet_info *vi = netdev_priv(dev); 2228 2229 cmd->base.speed = vi->speed; 2230 cmd->base.duplex = vi->duplex; 2231 cmd->base.port = PORT_OTHER; 2232 2233 return 0; 2234 } 2235 2236 static int virtnet_set_coalesce(struct net_device *dev, 2237 struct ethtool_coalesce *ec) 2238 { 2239 struct ethtool_coalesce ec_default = { 2240 .cmd = ETHTOOL_SCOALESCE, 2241 .rx_max_coalesced_frames = 1, 2242 }; 2243 struct virtnet_info *vi = netdev_priv(dev); 2244 int i, napi_weight; 2245 2246 if (ec->tx_max_coalesced_frames > 1) 2247 return -EINVAL; 2248 2249 ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames; 2250 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 2251 2252 /* disallow changes to fields not explicitly tested above */ 2253 if (memcmp(ec, &ec_default, sizeof(ec_default))) 2254 return -EINVAL; 2255 2256 if (napi_weight ^ vi->sq[0].napi.weight) { 2257 if (dev->flags & IFF_UP) 2258 return -EBUSY; 2259 for (i = 0; i < vi->max_queue_pairs; i++) 2260 vi->sq[i].napi.weight = napi_weight; 2261 } 2262 2263 return 0; 2264 } 2265 2266 static int virtnet_get_coalesce(struct net_device *dev, 2267 struct ethtool_coalesce *ec) 2268 { 2269 struct ethtool_coalesce ec_default = { 2270 .cmd = ETHTOOL_GCOALESCE, 2271 .rx_max_coalesced_frames = 1, 2272 }; 2273 struct virtnet_info *vi = netdev_priv(dev); 2274 2275 memcpy(ec, &ec_default, sizeof(ec_default)); 2276 2277 if (vi->sq[0].napi.weight) 2278 ec->tx_max_coalesced_frames = 1; 2279 2280 return 0; 2281 } 2282 2283 static void virtnet_init_settings(struct net_device *dev) 2284 { 2285 struct virtnet_info *vi = netdev_priv(dev); 2286 2287 vi->speed = SPEED_UNKNOWN; 2288 vi->duplex = DUPLEX_UNKNOWN; 2289 } 2290 2291 static void virtnet_update_settings(struct virtnet_info *vi) 2292 { 2293 u32 speed; 2294 u8 duplex; 2295 2296 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 2297 return; 2298 2299 speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config, 2300 speed)); 2301 if (ethtool_validate_speed(speed)) 2302 vi->speed = speed; 2303 duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config, 2304 duplex)); 2305 if (ethtool_validate_duplex(duplex)) 2306 vi->duplex = duplex; 2307 } 2308 2309 static const struct ethtool_ops virtnet_ethtool_ops = { 2310 .get_drvinfo = virtnet_get_drvinfo, 2311 .get_link = ethtool_op_get_link, 2312 .get_ringparam = virtnet_get_ringparam, 2313 .get_strings = virtnet_get_strings, 2314 .get_sset_count = virtnet_get_sset_count, 2315 .get_ethtool_stats = virtnet_get_ethtool_stats, 2316 .set_channels = virtnet_set_channels, 2317 .get_channels = virtnet_get_channels, 2318 .get_ts_info = ethtool_op_get_ts_info, 2319 .get_link_ksettings = virtnet_get_link_ksettings, 2320 .set_link_ksettings = virtnet_set_link_ksettings, 2321 .set_coalesce = virtnet_set_coalesce, 2322 .get_coalesce = virtnet_get_coalesce, 2323 }; 2324 2325 static void virtnet_freeze_down(struct virtio_device *vdev) 2326 { 2327 struct virtnet_info *vi = vdev->priv; 2328 int i; 2329 2330 /* Make sure no work handler is accessing the device */ 2331 flush_work(&vi->config_work); 2332 2333 netif_tx_lock_bh(vi->dev); 2334 netif_device_detach(vi->dev); 2335 netif_tx_unlock_bh(vi->dev); 2336 cancel_delayed_work_sync(&vi->refill); 2337 2338 if (netif_running(vi->dev)) { 2339 for (i = 0; i < vi->max_queue_pairs; i++) { 2340 napi_disable(&vi->rq[i].napi); 2341 virtnet_napi_tx_disable(&vi->sq[i].napi); 2342 } 2343 } 2344 } 2345 2346 static int init_vqs(struct virtnet_info *vi); 2347 2348 static int virtnet_restore_up(struct virtio_device *vdev) 2349 { 2350 struct virtnet_info *vi = vdev->priv; 2351 int err, i; 2352 2353 err = init_vqs(vi); 2354 if (err) 2355 return err; 2356 2357 virtio_device_ready(vdev); 2358 2359 if (netif_running(vi->dev)) { 2360 for (i = 0; i < vi->curr_queue_pairs; i++) 2361 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2362 schedule_delayed_work(&vi->refill, 0); 2363 2364 for (i = 0; i < vi->max_queue_pairs; i++) { 2365 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 2366 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 2367 &vi->sq[i].napi); 2368 } 2369 } 2370 2371 netif_tx_lock_bh(vi->dev); 2372 netif_device_attach(vi->dev); 2373 netif_tx_unlock_bh(vi->dev); 2374 return err; 2375 } 2376 2377 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 2378 { 2379 struct scatterlist sg; 2380 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); 2381 2382 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); 2383 2384 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 2385 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 2386 dev_warn(&vi->dev->dev, "Fail to set guest offload. \n"); 2387 return -EINVAL; 2388 } 2389 2390 return 0; 2391 } 2392 2393 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 2394 { 2395 u64 offloads = 0; 2396 2397 if (!vi->guest_offloads) 2398 return 0; 2399 2400 return virtnet_set_guest_offloads(vi, offloads); 2401 } 2402 2403 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 2404 { 2405 u64 offloads = vi->guest_offloads; 2406 2407 if (!vi->guest_offloads) 2408 return 0; 2409 2410 return virtnet_set_guest_offloads(vi, offloads); 2411 } 2412 2413 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 2414 struct netlink_ext_ack *extack) 2415 { 2416 unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); 2417 struct virtnet_info *vi = netdev_priv(dev); 2418 struct bpf_prog *old_prog; 2419 u16 xdp_qp = 0, curr_qp; 2420 int i, err; 2421 2422 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 2423 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 2424 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 2425 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 2426 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 2427 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { 2428 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); 2429 return -EOPNOTSUPP; 2430 } 2431 2432 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 2433 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 2434 return -EINVAL; 2435 } 2436 2437 if (dev->mtu > max_sz) { 2438 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); 2439 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); 2440 return -EINVAL; 2441 } 2442 2443 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 2444 if (prog) 2445 xdp_qp = nr_cpu_ids; 2446 2447 /* XDP requires extra queues for XDP_TX */ 2448 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 2449 NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); 2450 netdev_warn(dev, "request %i queues but max is %i\n", 2451 curr_qp + xdp_qp, vi->max_queue_pairs); 2452 return -ENOMEM; 2453 } 2454 2455 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 2456 if (!prog && !old_prog) 2457 return 0; 2458 2459 if (prog) { 2460 prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); 2461 if (IS_ERR(prog)) 2462 return PTR_ERR(prog); 2463 } 2464 2465 /* Make sure NAPI is not using any XDP TX queues for RX. */ 2466 if (netif_running(dev)) { 2467 for (i = 0; i < vi->max_queue_pairs; i++) { 2468 napi_disable(&vi->rq[i].napi); 2469 virtnet_napi_tx_disable(&vi->sq[i].napi); 2470 } 2471 } 2472 2473 if (!prog) { 2474 for (i = 0; i < vi->max_queue_pairs; i++) { 2475 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 2476 if (i == 0) 2477 virtnet_restore_guest_offloads(vi); 2478 } 2479 synchronize_net(); 2480 } 2481 2482 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 2483 if (err) 2484 goto err; 2485 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 2486 vi->xdp_queue_pairs = xdp_qp; 2487 2488 if (prog) { 2489 for (i = 0; i < vi->max_queue_pairs; i++) { 2490 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 2491 if (i == 0 && !old_prog) 2492 virtnet_clear_guest_offloads(vi); 2493 } 2494 } 2495 2496 for (i = 0; i < vi->max_queue_pairs; i++) { 2497 if (old_prog) 2498 bpf_prog_put(old_prog); 2499 if (netif_running(dev)) { 2500 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 2501 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 2502 &vi->sq[i].napi); 2503 } 2504 } 2505 2506 return 0; 2507 2508 err: 2509 if (!prog) { 2510 virtnet_clear_guest_offloads(vi); 2511 for (i = 0; i < vi->max_queue_pairs; i++) 2512 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 2513 } 2514 2515 if (netif_running(dev)) { 2516 for (i = 0; i < vi->max_queue_pairs; i++) { 2517 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 2518 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 2519 &vi->sq[i].napi); 2520 } 2521 } 2522 if (prog) 2523 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 2524 return err; 2525 } 2526 2527 static u32 virtnet_xdp_query(struct net_device *dev) 2528 { 2529 struct virtnet_info *vi = netdev_priv(dev); 2530 const struct bpf_prog *xdp_prog; 2531 int i; 2532 2533 for (i = 0; i < vi->max_queue_pairs; i++) { 2534 xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog); 2535 if (xdp_prog) 2536 return xdp_prog->aux->id; 2537 } 2538 return 0; 2539 } 2540 2541 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 2542 { 2543 switch (xdp->command) { 2544 case XDP_SETUP_PROG: 2545 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 2546 case XDP_QUERY_PROG: 2547 xdp->prog_id = virtnet_xdp_query(dev); 2548 return 0; 2549 default: 2550 return -EINVAL; 2551 } 2552 } 2553 2554 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 2555 size_t len) 2556 { 2557 struct virtnet_info *vi = netdev_priv(dev); 2558 int ret; 2559 2560 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 2561 return -EOPNOTSUPP; 2562 2563 ret = snprintf(buf, len, "sby"); 2564 if (ret >= len) 2565 return -EOPNOTSUPP; 2566 2567 return 0; 2568 } 2569 2570 static int virtnet_set_features(struct net_device *dev, 2571 netdev_features_t features) 2572 { 2573 struct virtnet_info *vi = netdev_priv(dev); 2574 u64 offloads; 2575 int err; 2576 2577 if ((dev->features ^ features) & NETIF_F_LRO) { 2578 if (vi->xdp_queue_pairs) 2579 return -EBUSY; 2580 2581 if (features & NETIF_F_LRO) 2582 offloads = vi->guest_offloads_capable; 2583 else 2584 offloads = 0; 2585 2586 err = virtnet_set_guest_offloads(vi, offloads); 2587 if (err) 2588 return err; 2589 vi->guest_offloads = offloads; 2590 } 2591 2592 return 0; 2593 } 2594 2595 static const struct net_device_ops virtnet_netdev = { 2596 .ndo_open = virtnet_open, 2597 .ndo_stop = virtnet_close, 2598 .ndo_start_xmit = start_xmit, 2599 .ndo_validate_addr = eth_validate_addr, 2600 .ndo_set_mac_address = virtnet_set_mac_address, 2601 .ndo_set_rx_mode = virtnet_set_rx_mode, 2602 .ndo_get_stats64 = virtnet_stats, 2603 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 2604 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 2605 .ndo_bpf = virtnet_xdp, 2606 .ndo_xdp_xmit = virtnet_xdp_xmit, 2607 .ndo_features_check = passthru_features_check, 2608 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 2609 .ndo_set_features = virtnet_set_features, 2610 }; 2611 2612 static void virtnet_config_changed_work(struct work_struct *work) 2613 { 2614 struct virtnet_info *vi = 2615 container_of(work, struct virtnet_info, config_work); 2616 u16 v; 2617 2618 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 2619 struct virtio_net_config, status, &v) < 0) 2620 return; 2621 2622 if (v & VIRTIO_NET_S_ANNOUNCE) { 2623 netdev_notify_peers(vi->dev); 2624 virtnet_ack_link_announce(vi); 2625 } 2626 2627 /* Ignore unknown (future) status bits */ 2628 v &= VIRTIO_NET_S_LINK_UP; 2629 2630 if (vi->status == v) 2631 return; 2632 2633 vi->status = v; 2634 2635 if (vi->status & VIRTIO_NET_S_LINK_UP) { 2636 virtnet_update_settings(vi); 2637 netif_carrier_on(vi->dev); 2638 netif_tx_wake_all_queues(vi->dev); 2639 } else { 2640 netif_carrier_off(vi->dev); 2641 netif_tx_stop_all_queues(vi->dev); 2642 } 2643 } 2644 2645 static void virtnet_config_changed(struct virtio_device *vdev) 2646 { 2647 struct virtnet_info *vi = vdev->priv; 2648 2649 schedule_work(&vi->config_work); 2650 } 2651 2652 static void virtnet_free_queues(struct virtnet_info *vi) 2653 { 2654 int i; 2655 2656 for (i = 0; i < vi->max_queue_pairs; i++) { 2657 napi_hash_del(&vi->rq[i].napi); 2658 netif_napi_del(&vi->rq[i].napi); 2659 netif_napi_del(&vi->sq[i].napi); 2660 } 2661 2662 /* We called napi_hash_del() before netif_napi_del(), 2663 * we need to respect an RCU grace period before freeing vi->rq 2664 */ 2665 synchronize_net(); 2666 2667 kfree(vi->rq); 2668 kfree(vi->sq); 2669 kfree(vi->ctrl); 2670 } 2671 2672 static void _free_receive_bufs(struct virtnet_info *vi) 2673 { 2674 struct bpf_prog *old_prog; 2675 int i; 2676 2677 for (i = 0; i < vi->max_queue_pairs; i++) { 2678 while (vi->rq[i].pages) 2679 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 2680 2681 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 2682 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 2683 if (old_prog) 2684 bpf_prog_put(old_prog); 2685 } 2686 } 2687 2688 static void free_receive_bufs(struct virtnet_info *vi) 2689 { 2690 rtnl_lock(); 2691 _free_receive_bufs(vi); 2692 rtnl_unlock(); 2693 } 2694 2695 static void free_receive_page_frags(struct virtnet_info *vi) 2696 { 2697 int i; 2698 for (i = 0; i < vi->max_queue_pairs; i++) 2699 if (vi->rq[i].alloc_frag.page) 2700 put_page(vi->rq[i].alloc_frag.page); 2701 } 2702 2703 static void free_unused_bufs(struct virtnet_info *vi) 2704 { 2705 void *buf; 2706 int i; 2707 2708 for (i = 0; i < vi->max_queue_pairs; i++) { 2709 struct virtqueue *vq = vi->sq[i].vq; 2710 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { 2711 if (!is_xdp_frame(buf)) 2712 dev_kfree_skb(buf); 2713 else 2714 xdp_return_frame(ptr_to_xdp(buf)); 2715 } 2716 } 2717 2718 for (i = 0; i < vi->max_queue_pairs; i++) { 2719 struct virtqueue *vq = vi->rq[i].vq; 2720 2721 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { 2722 if (vi->mergeable_rx_bufs) { 2723 put_page(virt_to_head_page(buf)); 2724 } else if (vi->big_packets) { 2725 give_pages(&vi->rq[i], buf); 2726 } else { 2727 put_page(virt_to_head_page(buf)); 2728 } 2729 } 2730 } 2731 } 2732 2733 static void virtnet_del_vqs(struct virtnet_info *vi) 2734 { 2735 struct virtio_device *vdev = vi->vdev; 2736 2737 virtnet_clean_affinity(vi, -1); 2738 2739 vdev->config->del_vqs(vdev); 2740 2741 virtnet_free_queues(vi); 2742 } 2743 2744 /* How large should a single buffer be so a queue full of these can fit at 2745 * least one full packet? 2746 * Logic below assumes the mergeable buffer header is used. 2747 */ 2748 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 2749 { 2750 const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2751 unsigned int rq_size = virtqueue_get_vring_size(vq); 2752 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 2753 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 2754 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 2755 2756 return max(max(min_buf_len, hdr_len) - hdr_len, 2757 (unsigned int)GOOD_PACKET_LEN); 2758 } 2759 2760 static int virtnet_find_vqs(struct virtnet_info *vi) 2761 { 2762 vq_callback_t **callbacks; 2763 struct virtqueue **vqs; 2764 int ret = -ENOMEM; 2765 int i, total_vqs; 2766 const char **names; 2767 bool *ctx; 2768 2769 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 2770 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 2771 * possible control vq. 2772 */ 2773 total_vqs = vi->max_queue_pairs * 2 + 2774 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 2775 2776 /* Allocate space for find_vqs parameters */ 2777 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 2778 if (!vqs) 2779 goto err_vq; 2780 callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL); 2781 if (!callbacks) 2782 goto err_callback; 2783 names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL); 2784 if (!names) 2785 goto err_names; 2786 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2787 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 2788 if (!ctx) 2789 goto err_ctx; 2790 } else { 2791 ctx = NULL; 2792 } 2793 2794 /* Parameters for control virtqueue, if any */ 2795 if (vi->has_cvq) { 2796 callbacks[total_vqs - 1] = NULL; 2797 names[total_vqs - 1] = "control"; 2798 } 2799 2800 /* Allocate/initialize parameters for send/receive virtqueues */ 2801 for (i = 0; i < vi->max_queue_pairs; i++) { 2802 callbacks[rxq2vq(i)] = skb_recv_done; 2803 callbacks[txq2vq(i)] = skb_xmit_done; 2804 sprintf(vi->rq[i].name, "input.%d", i); 2805 sprintf(vi->sq[i].name, "output.%d", i); 2806 names[rxq2vq(i)] = vi->rq[i].name; 2807 names[txq2vq(i)] = vi->sq[i].name; 2808 if (ctx) 2809 ctx[rxq2vq(i)] = true; 2810 } 2811 2812 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, 2813 names, ctx, NULL); 2814 if (ret) 2815 goto err_find; 2816 2817 if (vi->has_cvq) { 2818 vi->cvq = vqs[total_vqs - 1]; 2819 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 2820 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 2821 } 2822 2823 for (i = 0; i < vi->max_queue_pairs; i++) { 2824 vi->rq[i].vq = vqs[rxq2vq(i)]; 2825 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 2826 vi->sq[i].vq = vqs[txq2vq(i)]; 2827 } 2828 2829 /* run here: ret == 0. */ 2830 2831 2832 err_find: 2833 kfree(ctx); 2834 err_ctx: 2835 kfree(names); 2836 err_names: 2837 kfree(callbacks); 2838 err_callback: 2839 kfree(vqs); 2840 err_vq: 2841 return ret; 2842 } 2843 2844 static int virtnet_alloc_queues(struct virtnet_info *vi) 2845 { 2846 int i; 2847 2848 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 2849 if (!vi->ctrl) 2850 goto err_ctrl; 2851 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 2852 if (!vi->sq) 2853 goto err_sq; 2854 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 2855 if (!vi->rq) 2856 goto err_rq; 2857 2858 INIT_DELAYED_WORK(&vi->refill, refill_work); 2859 for (i = 0; i < vi->max_queue_pairs; i++) { 2860 vi->rq[i].pages = NULL; 2861 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, 2862 napi_weight); 2863 netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, 2864 napi_tx ? napi_weight : 0); 2865 2866 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 2867 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 2868 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 2869 2870 u64_stats_init(&vi->rq[i].stats.syncp); 2871 u64_stats_init(&vi->sq[i].stats.syncp); 2872 } 2873 2874 return 0; 2875 2876 err_rq: 2877 kfree(vi->sq); 2878 err_sq: 2879 kfree(vi->ctrl); 2880 err_ctrl: 2881 return -ENOMEM; 2882 } 2883 2884 static int init_vqs(struct virtnet_info *vi) 2885 { 2886 int ret; 2887 2888 /* Allocate send & receive queues */ 2889 ret = virtnet_alloc_queues(vi); 2890 if (ret) 2891 goto err; 2892 2893 ret = virtnet_find_vqs(vi); 2894 if (ret) 2895 goto err_free; 2896 2897 get_online_cpus(); 2898 virtnet_set_affinity(vi); 2899 put_online_cpus(); 2900 2901 return 0; 2902 2903 err_free: 2904 virtnet_free_queues(vi); 2905 err: 2906 return ret; 2907 } 2908 2909 #ifdef CONFIG_SYSFS 2910 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 2911 char *buf) 2912 { 2913 struct virtnet_info *vi = netdev_priv(queue->dev); 2914 unsigned int queue_index = get_netdev_rx_queue_index(queue); 2915 unsigned int headroom = virtnet_get_headroom(vi); 2916 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2917 struct ewma_pkt_len *avg; 2918 2919 BUG_ON(queue_index >= vi->max_queue_pairs); 2920 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 2921 return sprintf(buf, "%u\n", 2922 get_mergeable_buf_len(&vi->rq[queue_index], avg, 2923 SKB_DATA_ALIGN(headroom + tailroom))); 2924 } 2925 2926 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 2927 __ATTR_RO(mergeable_rx_buffer_size); 2928 2929 static struct attribute *virtio_net_mrg_rx_attrs[] = { 2930 &mergeable_rx_buffer_size_attribute.attr, 2931 NULL 2932 }; 2933 2934 static const struct attribute_group virtio_net_mrg_rx_group = { 2935 .name = "virtio_net", 2936 .attrs = virtio_net_mrg_rx_attrs 2937 }; 2938 #endif 2939 2940 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 2941 unsigned int fbit, 2942 const char *fname, const char *dname) 2943 { 2944 if (!virtio_has_feature(vdev, fbit)) 2945 return false; 2946 2947 dev_err(&vdev->dev, "device advertises feature %s but not %s", 2948 fname, dname); 2949 2950 return true; 2951 } 2952 2953 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 2954 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 2955 2956 static bool virtnet_validate_features(struct virtio_device *vdev) 2957 { 2958 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 2959 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 2960 "VIRTIO_NET_F_CTRL_VQ") || 2961 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 2962 "VIRTIO_NET_F_CTRL_VQ") || 2963 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 2964 "VIRTIO_NET_F_CTRL_VQ") || 2965 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 2966 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 2967 "VIRTIO_NET_F_CTRL_VQ"))) { 2968 return false; 2969 } 2970 2971 return true; 2972 } 2973 2974 #define MIN_MTU ETH_MIN_MTU 2975 #define MAX_MTU ETH_MAX_MTU 2976 2977 static int virtnet_validate(struct virtio_device *vdev) 2978 { 2979 if (!vdev->config->get) { 2980 dev_err(&vdev->dev, "%s failure: config access disabled\n", 2981 __func__); 2982 return -EINVAL; 2983 } 2984 2985 if (!virtnet_validate_features(vdev)) 2986 return -EINVAL; 2987 2988 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 2989 int mtu = virtio_cread16(vdev, 2990 offsetof(struct virtio_net_config, 2991 mtu)); 2992 if (mtu < MIN_MTU) 2993 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 2994 } 2995 2996 return 0; 2997 } 2998 2999 static int virtnet_probe(struct virtio_device *vdev) 3000 { 3001 int i, err = -ENOMEM; 3002 struct net_device *dev; 3003 struct virtnet_info *vi; 3004 u16 max_queue_pairs; 3005 int mtu; 3006 3007 /* Find if host supports multiqueue virtio_net device */ 3008 err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, 3009 struct virtio_net_config, 3010 max_virtqueue_pairs, &max_queue_pairs); 3011 3012 /* We need at least 2 queue's */ 3013 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 3014 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 3015 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 3016 max_queue_pairs = 1; 3017 3018 /* Allocate ourselves a network device with room for our info */ 3019 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 3020 if (!dev) 3021 return -ENOMEM; 3022 3023 /* Set up network device as normal. */ 3024 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; 3025 dev->netdev_ops = &virtnet_netdev; 3026 dev->features = NETIF_F_HIGHDMA; 3027 3028 dev->ethtool_ops = &virtnet_ethtool_ops; 3029 SET_NETDEV_DEV(dev, &vdev->dev); 3030 3031 /* Do we support "hardware" checksums? */ 3032 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 3033 /* This opens up the world of extra features. */ 3034 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3035 if (csum) 3036 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3037 3038 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 3039 dev->hw_features |= NETIF_F_TSO 3040 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 3041 } 3042 /* Individual feature bits: what can host handle? */ 3043 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 3044 dev->hw_features |= NETIF_F_TSO; 3045 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 3046 dev->hw_features |= NETIF_F_TSO6; 3047 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 3048 dev->hw_features |= NETIF_F_TSO_ECN; 3049 3050 dev->features |= NETIF_F_GSO_ROBUST; 3051 3052 if (gso) 3053 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 3054 /* (!csum && gso) case will be fixed by register_netdev() */ 3055 } 3056 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) 3057 dev->features |= NETIF_F_RXCSUM; 3058 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 3059 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 3060 dev->features |= NETIF_F_LRO; 3061 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 3062 dev->hw_features |= NETIF_F_LRO; 3063 3064 dev->vlan_features = dev->features; 3065 3066 /* MTU range: 68 - 65535 */ 3067 dev->min_mtu = MIN_MTU; 3068 dev->max_mtu = MAX_MTU; 3069 3070 /* Configuration may specify what MAC to use. Otherwise random. */ 3071 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) 3072 virtio_cread_bytes(vdev, 3073 offsetof(struct virtio_net_config, mac), 3074 dev->dev_addr, dev->addr_len); 3075 else 3076 eth_hw_addr_random(dev); 3077 3078 /* Set up our device-specific information */ 3079 vi = netdev_priv(dev); 3080 vi->dev = dev; 3081 vi->vdev = vdev; 3082 vdev->priv = vi; 3083 3084 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 3085 3086 /* If we can receive ANY GSO packets, we must allocate large ones. */ 3087 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 3088 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || 3089 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || 3090 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) 3091 vi->big_packets = true; 3092 3093 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 3094 vi->mergeable_rx_bufs = true; 3095 3096 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 3097 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 3098 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3099 else 3100 vi->hdr_len = sizeof(struct virtio_net_hdr); 3101 3102 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 3103 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 3104 vi->any_header_sg = true; 3105 3106 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 3107 vi->has_cvq = true; 3108 3109 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 3110 mtu = virtio_cread16(vdev, 3111 offsetof(struct virtio_net_config, 3112 mtu)); 3113 if (mtu < dev->min_mtu) { 3114 /* Should never trigger: MTU was previously validated 3115 * in virtnet_validate. 3116 */ 3117 dev_err(&vdev->dev, "device MTU appears to have changed " 3118 "it is now %d < %d", mtu, dev->min_mtu); 3119 goto free; 3120 } 3121 3122 dev->mtu = mtu; 3123 dev->max_mtu = mtu; 3124 3125 /* TODO: size buffers correctly in this case. */ 3126 if (dev->mtu > ETH_DATA_LEN) 3127 vi->big_packets = true; 3128 } 3129 3130 if (vi->any_header_sg) 3131 dev->needed_headroom = vi->hdr_len; 3132 3133 /* Enable multiqueue by default */ 3134 if (num_online_cpus() >= max_queue_pairs) 3135 vi->curr_queue_pairs = max_queue_pairs; 3136 else 3137 vi->curr_queue_pairs = num_online_cpus(); 3138 vi->max_queue_pairs = max_queue_pairs; 3139 3140 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 3141 err = init_vqs(vi); 3142 if (err) 3143 goto free; 3144 3145 #ifdef CONFIG_SYSFS 3146 if (vi->mergeable_rx_bufs) 3147 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 3148 #endif 3149 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 3150 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 3151 3152 virtnet_init_settings(dev); 3153 3154 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3155 vi->failover = net_failover_create(vi->dev); 3156 if (IS_ERR(vi->failover)) { 3157 err = PTR_ERR(vi->failover); 3158 goto free_vqs; 3159 } 3160 } 3161 3162 err = register_netdev(dev); 3163 if (err) { 3164 pr_debug("virtio_net: registering device failed\n"); 3165 goto free_failover; 3166 } 3167 3168 virtio_device_ready(vdev); 3169 3170 err = virtnet_cpu_notif_add(vi); 3171 if (err) { 3172 pr_debug("virtio_net: registering cpu notifier failed\n"); 3173 goto free_unregister_netdev; 3174 } 3175 3176 virtnet_set_queues(vi, vi->curr_queue_pairs); 3177 3178 /* Assume link up if device can't report link status, 3179 otherwise get link status from config. */ 3180 netif_carrier_off(dev); 3181 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3182 schedule_work(&vi->config_work); 3183 } else { 3184 vi->status = VIRTIO_NET_S_LINK_UP; 3185 virtnet_update_settings(vi); 3186 netif_carrier_on(dev); 3187 } 3188 3189 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 3190 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 3191 set_bit(guest_offloads[i], &vi->guest_offloads); 3192 vi->guest_offloads_capable = vi->guest_offloads; 3193 3194 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 3195 dev->name, max_queue_pairs); 3196 3197 return 0; 3198 3199 free_unregister_netdev: 3200 vi->vdev->config->reset(vdev); 3201 3202 unregister_netdev(dev); 3203 free_failover: 3204 net_failover_destroy(vi->failover); 3205 free_vqs: 3206 cancel_delayed_work_sync(&vi->refill); 3207 free_receive_page_frags(vi); 3208 virtnet_del_vqs(vi); 3209 free: 3210 free_netdev(dev); 3211 return err; 3212 } 3213 3214 static void remove_vq_common(struct virtnet_info *vi) 3215 { 3216 vi->vdev->config->reset(vi->vdev); 3217 3218 /* Free unused buffers in both send and recv, if any. */ 3219 free_unused_bufs(vi); 3220 3221 free_receive_bufs(vi); 3222 3223 free_receive_page_frags(vi); 3224 3225 virtnet_del_vqs(vi); 3226 } 3227 3228 static void virtnet_remove(struct virtio_device *vdev) 3229 { 3230 struct virtnet_info *vi = vdev->priv; 3231 3232 virtnet_cpu_notif_remove(vi); 3233 3234 /* Make sure no work handler is accessing the device. */ 3235 flush_work(&vi->config_work); 3236 3237 unregister_netdev(vi->dev); 3238 3239 net_failover_destroy(vi->failover); 3240 3241 remove_vq_common(vi); 3242 3243 free_netdev(vi->dev); 3244 } 3245 3246 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 3247 { 3248 struct virtnet_info *vi = vdev->priv; 3249 3250 virtnet_cpu_notif_remove(vi); 3251 virtnet_freeze_down(vdev); 3252 remove_vq_common(vi); 3253 3254 return 0; 3255 } 3256 3257 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 3258 { 3259 struct virtnet_info *vi = vdev->priv; 3260 int err; 3261 3262 err = virtnet_restore_up(vdev); 3263 if (err) 3264 return err; 3265 virtnet_set_queues(vi, vi->curr_queue_pairs); 3266 3267 err = virtnet_cpu_notif_add(vi); 3268 if (err) 3269 return err; 3270 3271 return 0; 3272 } 3273 3274 static struct virtio_device_id id_table[] = { 3275 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3276 { 0 }, 3277 }; 3278 3279 #define VIRTNET_FEATURES \ 3280 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 3281 VIRTIO_NET_F_MAC, \ 3282 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 3283 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 3284 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 3285 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 3286 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 3287 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 3288 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 3289 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 3290 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY 3291 3292 static unsigned int features[] = { 3293 VIRTNET_FEATURES, 3294 }; 3295 3296 static unsigned int features_legacy[] = { 3297 VIRTNET_FEATURES, 3298 VIRTIO_NET_F_GSO, 3299 VIRTIO_F_ANY_LAYOUT, 3300 }; 3301 3302 static struct virtio_driver virtio_net_driver = { 3303 .feature_table = features, 3304 .feature_table_size = ARRAY_SIZE(features), 3305 .feature_table_legacy = features_legacy, 3306 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 3307 .driver.name = KBUILD_MODNAME, 3308 .driver.owner = THIS_MODULE, 3309 .id_table = id_table, 3310 .validate = virtnet_validate, 3311 .probe = virtnet_probe, 3312 .remove = virtnet_remove, 3313 .config_changed = virtnet_config_changed, 3314 #ifdef CONFIG_PM_SLEEP 3315 .freeze = virtnet_freeze, 3316 .restore = virtnet_restore, 3317 #endif 3318 }; 3319 3320 static __init int virtio_net_driver_init(void) 3321 { 3322 int ret; 3323 3324 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 3325 virtnet_cpu_online, 3326 virtnet_cpu_down_prep); 3327 if (ret < 0) 3328 goto out; 3329 virtionet_online = ret; 3330 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 3331 NULL, virtnet_cpu_dead); 3332 if (ret) 3333 goto err_dead; 3334 3335 ret = register_virtio_driver(&virtio_net_driver); 3336 if (ret) 3337 goto err_virtio; 3338 return 0; 3339 err_virtio: 3340 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 3341 err_dead: 3342 cpuhp_remove_multi_state(virtionet_online); 3343 out: 3344 return ret; 3345 } 3346 module_init(virtio_net_driver_init); 3347 3348 static __exit void virtio_net_driver_exit(void) 3349 { 3350 unregister_virtio_driver(&virtio_net_driver); 3351 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 3352 cpuhp_remove_multi_state(virtionet_online); 3353 } 3354 module_exit(virtio_net_driver_exit); 3355 3356 MODULE_DEVICE_TABLE(virtio, id_table); 3357 MODULE_DESCRIPTION("Virtio network driver"); 3358 MODULE_LICENSE("GPL"); 3359