1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <net/route.h> 23 #include <net/xdp.h> 24 #include <net/net_failover.h> 25 #include <net/netdev_rx_queue.h> 26 27 static int napi_weight = NAPI_POLL_WEIGHT; 28 module_param(napi_weight, int, 0444); 29 30 static bool csum = true, gso = true, napi_tx = true; 31 module_param(csum, bool, 0444); 32 module_param(gso, bool, 0444); 33 module_param(napi_tx, bool, 0644); 34 35 /* FIXME: MTU in config. */ 36 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 37 #define GOOD_COPY_LEN 128 38 39 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 40 41 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 42 #define VIRTIO_XDP_HEADROOM 256 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 50 /* RX packet size EWMA. The average packet size is used to determine the packet 51 * buffer size when refilling RX rings. As the entire RX ring may be refilled 52 * at once, the weight is chosen so that the EWMA will be insensitive to short- 53 * term, transient changes in packet size. 54 */ 55 DECLARE_EWMA(pkt_len, 0, 64) 56 57 #define VIRTNET_DRIVER_VERSION "1.0.0" 58 59 static const unsigned long guest_offloads[] = { 60 VIRTIO_NET_F_GUEST_TSO4, 61 VIRTIO_NET_F_GUEST_TSO6, 62 VIRTIO_NET_F_GUEST_ECN, 63 VIRTIO_NET_F_GUEST_UFO, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GUEST_USO4, 66 VIRTIO_NET_F_GUEST_USO6, 67 VIRTIO_NET_F_GUEST_HDRLEN 68 }; 69 70 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 76 77 struct virtnet_stat_desc { 78 char desc[ETH_GSTRING_LEN]; 79 size_t offset; 80 }; 81 82 struct virtnet_sq_stats { 83 struct u64_stats_sync syncp; 84 u64_stats_t packets; 85 u64_stats_t bytes; 86 u64_stats_t xdp_tx; 87 u64_stats_t xdp_tx_drops; 88 u64_stats_t kicks; 89 u64_stats_t tx_timeouts; 90 }; 91 92 struct virtnet_rq_stats { 93 struct u64_stats_sync syncp; 94 u64_stats_t packets; 95 u64_stats_t bytes; 96 u64_stats_t drops; 97 u64_stats_t xdp_packets; 98 u64_stats_t xdp_tx; 99 u64_stats_t xdp_redirects; 100 u64_stats_t xdp_drops; 101 u64_stats_t kicks; 102 }; 103 104 #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) 105 #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) 106 107 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 108 { "packets", VIRTNET_SQ_STAT(packets) }, 109 { "bytes", VIRTNET_SQ_STAT(bytes) }, 110 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, 111 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, 112 { "kicks", VIRTNET_SQ_STAT(kicks) }, 113 { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, 114 }; 115 116 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 117 { "packets", VIRTNET_RQ_STAT(packets) }, 118 { "bytes", VIRTNET_RQ_STAT(bytes) }, 119 { "drops", VIRTNET_RQ_STAT(drops) }, 120 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, 121 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, 122 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, 123 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, 124 { "kicks", VIRTNET_RQ_STAT(kicks) }, 125 }; 126 127 #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) 128 #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) 129 130 struct virtnet_interrupt_coalesce { 131 u32 max_packets; 132 u32 max_usecs; 133 }; 134 135 /* The dma information of pages allocated at a time. */ 136 struct virtnet_rq_dma { 137 dma_addr_t addr; 138 u32 ref; 139 u16 len; 140 u16 need_sync; 141 }; 142 143 /* Internal representation of a send virtqueue */ 144 struct send_queue { 145 /* Virtqueue associated with this send _queue */ 146 struct virtqueue *vq; 147 148 /* TX: fragments + linear part + virtio header */ 149 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 150 151 /* Name of the send queue: output.$index */ 152 char name[16]; 153 154 struct virtnet_sq_stats stats; 155 156 struct virtnet_interrupt_coalesce intr_coal; 157 158 struct napi_struct napi; 159 160 /* Record whether sq is in reset state. */ 161 bool reset; 162 }; 163 164 /* Internal representation of a receive virtqueue */ 165 struct receive_queue { 166 /* Virtqueue associated with this receive_queue */ 167 struct virtqueue *vq; 168 169 struct napi_struct napi; 170 171 struct bpf_prog __rcu *xdp_prog; 172 173 struct virtnet_rq_stats stats; 174 175 struct virtnet_interrupt_coalesce intr_coal; 176 177 /* Chain pages by the private ptr. */ 178 struct page *pages; 179 180 /* Average packet length for mergeable receive buffers. */ 181 struct ewma_pkt_len mrg_avg_pkt_len; 182 183 /* Page frag for packet buffer allocation. */ 184 struct page_frag alloc_frag; 185 186 /* RX: fragments + linear part + virtio header */ 187 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 188 189 /* Min single buffer size for mergeable buffers case. */ 190 unsigned int min_buf_len; 191 192 /* Name of this receive queue: input.$index */ 193 char name[16]; 194 195 struct xdp_rxq_info xdp_rxq; 196 197 /* Record the last dma info to free after new pages is allocated. */ 198 struct virtnet_rq_dma *last_dma; 199 200 /* Do dma by self */ 201 bool do_dma; 202 }; 203 204 /* This structure can contain rss message with maximum settings for indirection table and keysize 205 * Note, that default structure that describes RSS configuration virtio_net_rss_config 206 * contains same info but can't handle table values. 207 * In any case, structure would be passed to virtio hw through sg_buf split by parts 208 * because table sizes may be differ according to the device configuration. 209 */ 210 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 211 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 212 struct virtio_net_ctrl_rss { 213 u32 hash_types; 214 u16 indirection_table_mask; 215 u16 unclassified_queue; 216 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 217 u16 max_tx_vq; 218 u8 hash_key_length; 219 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 220 }; 221 222 /* Control VQ buffers: protected by the rtnl lock */ 223 struct control_buf { 224 struct virtio_net_ctrl_hdr hdr; 225 virtio_net_ctrl_ack status; 226 struct virtio_net_ctrl_mq mq; 227 u8 promisc; 228 u8 allmulti; 229 __virtio16 vid; 230 __virtio64 offloads; 231 struct virtio_net_ctrl_rss rss; 232 struct virtio_net_ctrl_coal_tx coal_tx; 233 struct virtio_net_ctrl_coal_rx coal_rx; 234 struct virtio_net_ctrl_coal_vq coal_vq; 235 }; 236 237 struct virtnet_info { 238 struct virtio_device *vdev; 239 struct virtqueue *cvq; 240 struct net_device *dev; 241 struct send_queue *sq; 242 struct receive_queue *rq; 243 unsigned int status; 244 245 /* Max # of queue pairs supported by the device */ 246 u16 max_queue_pairs; 247 248 /* # of queue pairs currently used by the driver */ 249 u16 curr_queue_pairs; 250 251 /* # of XDP queue pairs currently used by the driver */ 252 u16 xdp_queue_pairs; 253 254 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 255 bool xdp_enabled; 256 257 /* I like... big packets and I cannot lie! */ 258 bool big_packets; 259 260 /* number of sg entries allocated for big packets */ 261 unsigned int big_packets_num_skbfrags; 262 263 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 264 bool mergeable_rx_bufs; 265 266 /* Host supports rss and/or hash report */ 267 bool has_rss; 268 bool has_rss_hash_report; 269 u8 rss_key_size; 270 u16 rss_indir_table_size; 271 u32 rss_hash_types_supported; 272 u32 rss_hash_types_saved; 273 274 /* Has control virtqueue */ 275 bool has_cvq; 276 277 /* Host can handle any s/g split between our header and packet data */ 278 bool any_header_sg; 279 280 /* Packet virtio header size */ 281 u8 hdr_len; 282 283 /* Work struct for delayed refilling if we run low on memory. */ 284 struct delayed_work refill; 285 286 /* Is delayed refill enabled? */ 287 bool refill_enabled; 288 289 /* The lock to synchronize the access to refill_enabled */ 290 spinlock_t refill_lock; 291 292 /* Work struct for config space updates */ 293 struct work_struct config_work; 294 295 /* Does the affinity hint is set for virtqueues? */ 296 bool affinity_hint_set; 297 298 /* CPU hotplug instances for online & dead */ 299 struct hlist_node node; 300 struct hlist_node node_dead; 301 302 struct control_buf *ctrl; 303 304 /* Ethtool settings */ 305 u8 duplex; 306 u32 speed; 307 308 /* Interrupt coalescing settings */ 309 struct virtnet_interrupt_coalesce intr_coal_tx; 310 struct virtnet_interrupt_coalesce intr_coal_rx; 311 312 unsigned long guest_offloads; 313 unsigned long guest_offloads_capable; 314 315 /* failover when STANDBY feature enabled */ 316 struct failover *failover; 317 }; 318 319 struct padded_vnet_hdr { 320 struct virtio_net_hdr_v1_hash hdr; 321 /* 322 * hdr is in a separate sg buffer, and data sg buffer shares same page 323 * with this header sg. This padding makes next sg 16 byte aligned 324 * after the header. 325 */ 326 char padding[12]; 327 }; 328 329 struct virtio_net_common_hdr { 330 union { 331 struct virtio_net_hdr hdr; 332 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 333 struct virtio_net_hdr_v1_hash hash_v1_hdr; 334 }; 335 }; 336 337 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 338 339 static bool is_xdp_frame(void *ptr) 340 { 341 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 342 } 343 344 static void *xdp_to_ptr(struct xdp_frame *ptr) 345 { 346 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 347 } 348 349 static struct xdp_frame *ptr_to_xdp(void *ptr) 350 { 351 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 352 } 353 354 /* Converting between virtqueue no. and kernel tx/rx queue no. 355 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 356 */ 357 static int vq2txq(struct virtqueue *vq) 358 { 359 return (vq->index - 1) / 2; 360 } 361 362 static int txq2vq(int txq) 363 { 364 return txq * 2 + 1; 365 } 366 367 static int vq2rxq(struct virtqueue *vq) 368 { 369 return vq->index / 2; 370 } 371 372 static int rxq2vq(int rxq) 373 { 374 return rxq * 2; 375 } 376 377 static inline struct virtio_net_common_hdr * 378 skb_vnet_common_hdr(struct sk_buff *skb) 379 { 380 return (struct virtio_net_common_hdr *)skb->cb; 381 } 382 383 /* 384 * private is used to chain pages for big packets, put the whole 385 * most recent used list in the beginning for reuse 386 */ 387 static void give_pages(struct receive_queue *rq, struct page *page) 388 { 389 struct page *end; 390 391 /* Find end of list, sew whole thing into vi->rq.pages. */ 392 for (end = page; end->private; end = (struct page *)end->private); 393 end->private = (unsigned long)rq->pages; 394 rq->pages = page; 395 } 396 397 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 398 { 399 struct page *p = rq->pages; 400 401 if (p) { 402 rq->pages = (struct page *)p->private; 403 /* clear private here, it is used to chain pages */ 404 p->private = 0; 405 } else 406 p = alloc_page(gfp_mask); 407 return p; 408 } 409 410 static void virtnet_rq_free_buf(struct virtnet_info *vi, 411 struct receive_queue *rq, void *buf) 412 { 413 if (vi->mergeable_rx_bufs) 414 put_page(virt_to_head_page(buf)); 415 else if (vi->big_packets) 416 give_pages(rq, buf); 417 else 418 put_page(virt_to_head_page(buf)); 419 } 420 421 static void enable_delayed_refill(struct virtnet_info *vi) 422 { 423 spin_lock_bh(&vi->refill_lock); 424 vi->refill_enabled = true; 425 spin_unlock_bh(&vi->refill_lock); 426 } 427 428 static void disable_delayed_refill(struct virtnet_info *vi) 429 { 430 spin_lock_bh(&vi->refill_lock); 431 vi->refill_enabled = false; 432 spin_unlock_bh(&vi->refill_lock); 433 } 434 435 static void virtqueue_napi_schedule(struct napi_struct *napi, 436 struct virtqueue *vq) 437 { 438 if (napi_schedule_prep(napi)) { 439 virtqueue_disable_cb(vq); 440 __napi_schedule(napi); 441 } 442 } 443 444 static void virtqueue_napi_complete(struct napi_struct *napi, 445 struct virtqueue *vq, int processed) 446 { 447 int opaque; 448 449 opaque = virtqueue_enable_cb_prepare(vq); 450 if (napi_complete_done(napi, processed)) { 451 if (unlikely(virtqueue_poll(vq, opaque))) 452 virtqueue_napi_schedule(napi, vq); 453 } else { 454 virtqueue_disable_cb(vq); 455 } 456 } 457 458 static void skb_xmit_done(struct virtqueue *vq) 459 { 460 struct virtnet_info *vi = vq->vdev->priv; 461 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 462 463 /* Suppress further interrupts. */ 464 virtqueue_disable_cb(vq); 465 466 if (napi->weight) 467 virtqueue_napi_schedule(napi, vq); 468 else 469 /* We were probably waiting for more output buffers. */ 470 netif_wake_subqueue(vi->dev, vq2txq(vq)); 471 } 472 473 #define MRG_CTX_HEADER_SHIFT 22 474 static void *mergeable_len_to_ctx(unsigned int truesize, 475 unsigned int headroom) 476 { 477 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 478 } 479 480 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 481 { 482 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 483 } 484 485 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 486 { 487 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 488 } 489 490 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 491 unsigned int headroom, 492 unsigned int len) 493 { 494 struct sk_buff *skb; 495 496 skb = build_skb(buf, buflen); 497 if (unlikely(!skb)) 498 return NULL; 499 500 skb_reserve(skb, headroom); 501 skb_put(skb, len); 502 503 return skb; 504 } 505 506 /* Called from bottom half context */ 507 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 508 struct receive_queue *rq, 509 struct page *page, unsigned int offset, 510 unsigned int len, unsigned int truesize, 511 unsigned int headroom) 512 { 513 struct sk_buff *skb; 514 struct virtio_net_common_hdr *hdr; 515 unsigned int copy, hdr_len, hdr_padded_len; 516 struct page *page_to_free = NULL; 517 int tailroom, shinfo_size; 518 char *p, *hdr_p, *buf; 519 520 p = page_address(page) + offset; 521 hdr_p = p; 522 523 hdr_len = vi->hdr_len; 524 if (vi->mergeable_rx_bufs) 525 hdr_padded_len = hdr_len; 526 else 527 hdr_padded_len = sizeof(struct padded_vnet_hdr); 528 529 buf = p - headroom; 530 len -= hdr_len; 531 offset += hdr_padded_len; 532 p += hdr_padded_len; 533 tailroom = truesize - headroom - hdr_padded_len - len; 534 535 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 536 537 /* copy small packet so we can reuse these pages */ 538 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 539 skb = virtnet_build_skb(buf, truesize, p - buf, len); 540 if (unlikely(!skb)) 541 return NULL; 542 543 page = (struct page *)page->private; 544 if (page) 545 give_pages(rq, page); 546 goto ok; 547 } 548 549 /* copy small packet so we can reuse these pages for small data */ 550 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 551 if (unlikely(!skb)) 552 return NULL; 553 554 /* Copy all frame if it fits skb->head, otherwise 555 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 556 */ 557 if (len <= skb_tailroom(skb)) 558 copy = len; 559 else 560 copy = ETH_HLEN; 561 skb_put_data(skb, p, copy); 562 563 len -= copy; 564 offset += copy; 565 566 if (vi->mergeable_rx_bufs) { 567 if (len) 568 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 569 else 570 page_to_free = page; 571 goto ok; 572 } 573 574 /* 575 * Verify that we can indeed put this data into a skb. 576 * This is here to handle cases when the device erroneously 577 * tries to receive more than is possible. This is usually 578 * the case of a broken device. 579 */ 580 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 581 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 582 dev_kfree_skb(skb); 583 return NULL; 584 } 585 BUG_ON(offset >= PAGE_SIZE); 586 while (len) { 587 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 588 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 589 frag_size, truesize); 590 len -= frag_size; 591 page = (struct page *)page->private; 592 offset = 0; 593 } 594 595 if (page) 596 give_pages(rq, page); 597 598 ok: 599 hdr = skb_vnet_common_hdr(skb); 600 memcpy(hdr, hdr_p, hdr_len); 601 if (page_to_free) 602 put_page(page_to_free); 603 604 return skb; 605 } 606 607 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 608 { 609 struct page *page = virt_to_head_page(buf); 610 struct virtnet_rq_dma *dma; 611 void *head; 612 int offset; 613 614 head = page_address(page); 615 616 dma = head; 617 618 --dma->ref; 619 620 if (dma->need_sync && len) { 621 offset = buf - (head + sizeof(*dma)); 622 623 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 624 offset, len, 625 DMA_FROM_DEVICE); 626 } 627 628 if (dma->ref) 629 return; 630 631 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 632 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 633 put_page(page); 634 } 635 636 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 637 { 638 void *buf; 639 640 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 641 if (buf && rq->do_dma) 642 virtnet_rq_unmap(rq, buf, *len); 643 644 return buf; 645 } 646 647 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 648 { 649 struct virtnet_rq_dma *dma; 650 dma_addr_t addr; 651 u32 offset; 652 void *head; 653 654 if (!rq->do_dma) { 655 sg_init_one(rq->sg, buf, len); 656 return; 657 } 658 659 head = page_address(rq->alloc_frag.page); 660 661 offset = buf - head; 662 663 dma = head; 664 665 addr = dma->addr - sizeof(*dma) + offset; 666 667 sg_init_table(rq->sg, 1); 668 rq->sg[0].dma_address = addr; 669 rq->sg[0].length = len; 670 } 671 672 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 673 { 674 struct page_frag *alloc_frag = &rq->alloc_frag; 675 struct virtnet_rq_dma *dma; 676 void *buf, *head; 677 dma_addr_t addr; 678 679 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 680 return NULL; 681 682 head = page_address(alloc_frag->page); 683 684 if (rq->do_dma) { 685 dma = head; 686 687 /* new pages */ 688 if (!alloc_frag->offset) { 689 if (rq->last_dma) { 690 /* Now, the new page is allocated, the last dma 691 * will not be used. So the dma can be unmapped 692 * if the ref is 0. 693 */ 694 virtnet_rq_unmap(rq, rq->last_dma, 0); 695 rq->last_dma = NULL; 696 } 697 698 dma->len = alloc_frag->size - sizeof(*dma); 699 700 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 701 dma->len, DMA_FROM_DEVICE, 0); 702 if (virtqueue_dma_mapping_error(rq->vq, addr)) 703 return NULL; 704 705 dma->addr = addr; 706 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 707 708 /* Add a reference to dma to prevent the entire dma from 709 * being released during error handling. This reference 710 * will be freed after the pages are no longer used. 711 */ 712 get_page(alloc_frag->page); 713 dma->ref = 1; 714 alloc_frag->offset = sizeof(*dma); 715 716 rq->last_dma = dma; 717 } 718 719 ++dma->ref; 720 } 721 722 buf = head + alloc_frag->offset; 723 724 get_page(alloc_frag->page); 725 alloc_frag->offset += size; 726 727 return buf; 728 } 729 730 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 731 { 732 int i; 733 734 /* disable for big mode */ 735 if (!vi->mergeable_rx_bufs && vi->big_packets) 736 return; 737 738 for (i = 0; i < vi->max_queue_pairs; i++) { 739 if (virtqueue_set_dma_premapped(vi->rq[i].vq)) 740 continue; 741 742 vi->rq[i].do_dma = true; 743 } 744 } 745 746 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 747 { 748 struct virtnet_info *vi = vq->vdev->priv; 749 struct receive_queue *rq; 750 int i = vq2rxq(vq); 751 752 rq = &vi->rq[i]; 753 754 if (rq->do_dma) 755 virtnet_rq_unmap(rq, buf, 0); 756 757 virtnet_rq_free_buf(vi, rq, buf); 758 } 759 760 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) 761 { 762 unsigned int len; 763 unsigned int packets = 0; 764 unsigned int bytes = 0; 765 void *ptr; 766 767 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 768 if (likely(!is_xdp_frame(ptr))) { 769 struct sk_buff *skb = ptr; 770 771 pr_debug("Sent skb %p\n", skb); 772 773 bytes += skb->len; 774 napi_consume_skb(skb, in_napi); 775 } else { 776 struct xdp_frame *frame = ptr_to_xdp(ptr); 777 778 bytes += xdp_get_frame_len(frame); 779 xdp_return_frame(frame); 780 } 781 packets++; 782 } 783 784 /* Avoid overhead when no packets have been processed 785 * happens when called speculatively from start_xmit. 786 */ 787 if (!packets) 788 return; 789 790 u64_stats_update_begin(&sq->stats.syncp); 791 u64_stats_add(&sq->stats.bytes, bytes); 792 u64_stats_add(&sq->stats.packets, packets); 793 u64_stats_update_end(&sq->stats.syncp); 794 } 795 796 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 797 { 798 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 799 return false; 800 else if (q < vi->curr_queue_pairs) 801 return true; 802 else 803 return false; 804 } 805 806 static void check_sq_full_and_disable(struct virtnet_info *vi, 807 struct net_device *dev, 808 struct send_queue *sq) 809 { 810 bool use_napi = sq->napi.weight; 811 int qnum; 812 813 qnum = sq - vi->sq; 814 815 /* If running out of space, stop queue to avoid getting packets that we 816 * are then unable to transmit. 817 * An alternative would be to force queuing layer to requeue the skb by 818 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 819 * returned in a normal path of operation: it means that driver is not 820 * maintaining the TX queue stop/start state properly, and causes 821 * the stack to do a non-trivial amount of useless work. 822 * Since most packets only take 1 or 2 ring slots, stopping the queue 823 * early means 16 slots are typically wasted. 824 */ 825 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 826 netif_stop_subqueue(dev, qnum); 827 if (use_napi) { 828 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 829 virtqueue_napi_schedule(&sq->napi, sq->vq); 830 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 831 /* More just got used, free them then recheck. */ 832 free_old_xmit_skbs(sq, false); 833 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 834 netif_start_subqueue(dev, qnum); 835 virtqueue_disable_cb(sq->vq); 836 } 837 } 838 } 839 } 840 841 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 842 struct send_queue *sq, 843 struct xdp_frame *xdpf) 844 { 845 struct virtio_net_hdr_mrg_rxbuf *hdr; 846 struct skb_shared_info *shinfo; 847 u8 nr_frags = 0; 848 int err, i; 849 850 if (unlikely(xdpf->headroom < vi->hdr_len)) 851 return -EOVERFLOW; 852 853 if (unlikely(xdp_frame_has_frags(xdpf))) { 854 shinfo = xdp_get_shared_info_from_frame(xdpf); 855 nr_frags = shinfo->nr_frags; 856 } 857 858 /* In wrapping function virtnet_xdp_xmit(), we need to free 859 * up the pending old buffers, where we need to calculate the 860 * position of skb_shared_info in xdp_get_frame_len() and 861 * xdp_return_frame(), which will involve to xdpf->data and 862 * xdpf->headroom. Therefore, we need to update the value of 863 * headroom synchronously here. 864 */ 865 xdpf->headroom -= vi->hdr_len; 866 xdpf->data -= vi->hdr_len; 867 /* Zero header and leave csum up to XDP layers */ 868 hdr = xdpf->data; 869 memset(hdr, 0, vi->hdr_len); 870 xdpf->len += vi->hdr_len; 871 872 sg_init_table(sq->sg, nr_frags + 1); 873 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 874 for (i = 0; i < nr_frags; i++) { 875 skb_frag_t *frag = &shinfo->frags[i]; 876 877 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 878 skb_frag_size(frag), skb_frag_off(frag)); 879 } 880 881 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 882 xdp_to_ptr(xdpf), GFP_ATOMIC); 883 if (unlikely(err)) 884 return -ENOSPC; /* Caller handle free/refcnt */ 885 886 return 0; 887 } 888 889 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 890 * the current cpu, so it does not need to be locked. 891 * 892 * Here we use marco instead of inline functions because we have to deal with 893 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 894 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 895 * functions to perfectly solve these three problems at the same time. 896 */ 897 #define virtnet_xdp_get_sq(vi) ({ \ 898 int cpu = smp_processor_id(); \ 899 struct netdev_queue *txq; \ 900 typeof(vi) v = (vi); \ 901 unsigned int qp; \ 902 \ 903 if (v->curr_queue_pairs > nr_cpu_ids) { \ 904 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 905 qp += cpu; \ 906 txq = netdev_get_tx_queue(v->dev, qp); \ 907 __netif_tx_acquire(txq); \ 908 } else { \ 909 qp = cpu % v->curr_queue_pairs; \ 910 txq = netdev_get_tx_queue(v->dev, qp); \ 911 __netif_tx_lock(txq, cpu); \ 912 } \ 913 v->sq + qp; \ 914 }) 915 916 #define virtnet_xdp_put_sq(vi, q) { \ 917 struct netdev_queue *txq; \ 918 typeof(vi) v = (vi); \ 919 \ 920 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 921 if (v->curr_queue_pairs > nr_cpu_ids) \ 922 __netif_tx_release(txq); \ 923 else \ 924 __netif_tx_unlock(txq); \ 925 } 926 927 static int virtnet_xdp_xmit(struct net_device *dev, 928 int n, struct xdp_frame **frames, u32 flags) 929 { 930 struct virtnet_info *vi = netdev_priv(dev); 931 struct receive_queue *rq = vi->rq; 932 struct bpf_prog *xdp_prog; 933 struct send_queue *sq; 934 unsigned int len; 935 int packets = 0; 936 int bytes = 0; 937 int nxmit = 0; 938 int kicks = 0; 939 void *ptr; 940 int ret; 941 int i; 942 943 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 944 * indicate XDP resources have been successfully allocated. 945 */ 946 xdp_prog = rcu_access_pointer(rq->xdp_prog); 947 if (!xdp_prog) 948 return -ENXIO; 949 950 sq = virtnet_xdp_get_sq(vi); 951 952 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 953 ret = -EINVAL; 954 goto out; 955 } 956 957 /* Free up any pending old buffers before queueing new ones. */ 958 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 959 if (likely(is_xdp_frame(ptr))) { 960 struct xdp_frame *frame = ptr_to_xdp(ptr); 961 962 bytes += xdp_get_frame_len(frame); 963 xdp_return_frame(frame); 964 } else { 965 struct sk_buff *skb = ptr; 966 967 bytes += skb->len; 968 napi_consume_skb(skb, false); 969 } 970 packets++; 971 } 972 973 for (i = 0; i < n; i++) { 974 struct xdp_frame *xdpf = frames[i]; 975 976 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 977 break; 978 nxmit++; 979 } 980 ret = nxmit; 981 982 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 983 check_sq_full_and_disable(vi, dev, sq); 984 985 if (flags & XDP_XMIT_FLUSH) { 986 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 987 kicks = 1; 988 } 989 out: 990 u64_stats_update_begin(&sq->stats.syncp); 991 u64_stats_add(&sq->stats.bytes, bytes); 992 u64_stats_add(&sq->stats.packets, packets); 993 u64_stats_add(&sq->stats.xdp_tx, n); 994 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 995 u64_stats_add(&sq->stats.kicks, kicks); 996 u64_stats_update_end(&sq->stats.syncp); 997 998 virtnet_xdp_put_sq(vi, sq); 999 return ret; 1000 } 1001 1002 static void put_xdp_frags(struct xdp_buff *xdp) 1003 { 1004 struct skb_shared_info *shinfo; 1005 struct page *xdp_page; 1006 int i; 1007 1008 if (xdp_buff_has_frags(xdp)) { 1009 shinfo = xdp_get_shared_info_from_buff(xdp); 1010 for (i = 0; i < shinfo->nr_frags; i++) { 1011 xdp_page = skb_frag_page(&shinfo->frags[i]); 1012 put_page(xdp_page); 1013 } 1014 } 1015 } 1016 1017 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1018 struct net_device *dev, 1019 unsigned int *xdp_xmit, 1020 struct virtnet_rq_stats *stats) 1021 { 1022 struct xdp_frame *xdpf; 1023 int err; 1024 u32 act; 1025 1026 act = bpf_prog_run_xdp(xdp_prog, xdp); 1027 u64_stats_inc(&stats->xdp_packets); 1028 1029 switch (act) { 1030 case XDP_PASS: 1031 return act; 1032 1033 case XDP_TX: 1034 u64_stats_inc(&stats->xdp_tx); 1035 xdpf = xdp_convert_buff_to_frame(xdp); 1036 if (unlikely(!xdpf)) { 1037 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1038 return XDP_DROP; 1039 } 1040 1041 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1042 if (unlikely(!err)) { 1043 xdp_return_frame_rx_napi(xdpf); 1044 } else if (unlikely(err < 0)) { 1045 trace_xdp_exception(dev, xdp_prog, act); 1046 return XDP_DROP; 1047 } 1048 *xdp_xmit |= VIRTIO_XDP_TX; 1049 return act; 1050 1051 case XDP_REDIRECT: 1052 u64_stats_inc(&stats->xdp_redirects); 1053 err = xdp_do_redirect(dev, xdp, xdp_prog); 1054 if (err) 1055 return XDP_DROP; 1056 1057 *xdp_xmit |= VIRTIO_XDP_REDIR; 1058 return act; 1059 1060 default: 1061 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1062 fallthrough; 1063 case XDP_ABORTED: 1064 trace_xdp_exception(dev, xdp_prog, act); 1065 fallthrough; 1066 case XDP_DROP: 1067 return XDP_DROP; 1068 } 1069 } 1070 1071 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1072 { 1073 return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; 1074 } 1075 1076 /* We copy the packet for XDP in the following cases: 1077 * 1078 * 1) Packet is scattered across multiple rx buffers. 1079 * 2) Headroom space is insufficient. 1080 * 1081 * This is inefficient but it's a temporary condition that 1082 * we hit right after XDP is enabled and until queue is refilled 1083 * with large buffers with sufficient headroom - so it should affect 1084 * at most queue size packets. 1085 * Afterwards, the conditions to enable 1086 * XDP should preclude the underlying device from sending packets 1087 * across multiple buffers (num_buf > 1), and we make sure buffers 1088 * have enough headroom. 1089 */ 1090 static struct page *xdp_linearize_page(struct receive_queue *rq, 1091 int *num_buf, 1092 struct page *p, 1093 int offset, 1094 int page_off, 1095 unsigned int *len) 1096 { 1097 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1098 struct page *page; 1099 1100 if (page_off + *len + tailroom > PAGE_SIZE) 1101 return NULL; 1102 1103 page = alloc_page(GFP_ATOMIC); 1104 if (!page) 1105 return NULL; 1106 1107 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1108 page_off += *len; 1109 1110 while (--*num_buf) { 1111 unsigned int buflen; 1112 void *buf; 1113 int off; 1114 1115 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1116 if (unlikely(!buf)) 1117 goto err_buf; 1118 1119 p = virt_to_head_page(buf); 1120 off = buf - page_address(p); 1121 1122 /* guard against a misconfigured or uncooperative backend that 1123 * is sending packet larger than the MTU. 1124 */ 1125 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1126 put_page(p); 1127 goto err_buf; 1128 } 1129 1130 memcpy(page_address(page) + page_off, 1131 page_address(p) + off, buflen); 1132 page_off += buflen; 1133 put_page(p); 1134 } 1135 1136 /* Headroom does not contribute to packet length */ 1137 *len = page_off - VIRTIO_XDP_HEADROOM; 1138 return page; 1139 err_buf: 1140 __free_pages(page, 0); 1141 return NULL; 1142 } 1143 1144 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1145 unsigned int xdp_headroom, 1146 void *buf, 1147 unsigned int len) 1148 { 1149 unsigned int header_offset; 1150 unsigned int headroom; 1151 unsigned int buflen; 1152 struct sk_buff *skb; 1153 1154 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1155 headroom = vi->hdr_len + header_offset; 1156 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1157 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1158 1159 skb = virtnet_build_skb(buf, buflen, headroom, len); 1160 if (unlikely(!skb)) 1161 return NULL; 1162 1163 buf += header_offset; 1164 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1165 1166 return skb; 1167 } 1168 1169 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1170 struct virtnet_info *vi, 1171 struct receive_queue *rq, 1172 struct bpf_prog *xdp_prog, 1173 void *buf, 1174 unsigned int xdp_headroom, 1175 unsigned int len, 1176 unsigned int *xdp_xmit, 1177 struct virtnet_rq_stats *stats) 1178 { 1179 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1180 unsigned int headroom = vi->hdr_len + header_offset; 1181 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1182 struct page *page = virt_to_head_page(buf); 1183 struct page *xdp_page; 1184 unsigned int buflen; 1185 struct xdp_buff xdp; 1186 struct sk_buff *skb; 1187 unsigned int metasize = 0; 1188 u32 act; 1189 1190 if (unlikely(hdr->hdr.gso_type)) 1191 goto err_xdp; 1192 1193 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1194 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1195 1196 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1197 int offset = buf - page_address(page) + header_offset; 1198 unsigned int tlen = len + vi->hdr_len; 1199 int num_buf = 1; 1200 1201 xdp_headroom = virtnet_get_headroom(vi); 1202 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1203 headroom = vi->hdr_len + header_offset; 1204 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1205 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1206 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1207 offset, header_offset, 1208 &tlen); 1209 if (!xdp_page) 1210 goto err_xdp; 1211 1212 buf = page_address(xdp_page); 1213 put_page(page); 1214 page = xdp_page; 1215 } 1216 1217 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1218 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1219 xdp_headroom, len, true); 1220 1221 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1222 1223 switch (act) { 1224 case XDP_PASS: 1225 /* Recalculate length in case bpf program changed it */ 1226 len = xdp.data_end - xdp.data; 1227 metasize = xdp.data - xdp.data_meta; 1228 break; 1229 1230 case XDP_TX: 1231 case XDP_REDIRECT: 1232 goto xdp_xmit; 1233 1234 default: 1235 goto err_xdp; 1236 } 1237 1238 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1239 if (unlikely(!skb)) 1240 goto err; 1241 1242 if (metasize) 1243 skb_metadata_set(skb, metasize); 1244 1245 return skb; 1246 1247 err_xdp: 1248 u64_stats_inc(&stats->xdp_drops); 1249 err: 1250 u64_stats_inc(&stats->drops); 1251 put_page(page); 1252 xdp_xmit: 1253 return NULL; 1254 } 1255 1256 static struct sk_buff *receive_small(struct net_device *dev, 1257 struct virtnet_info *vi, 1258 struct receive_queue *rq, 1259 void *buf, void *ctx, 1260 unsigned int len, 1261 unsigned int *xdp_xmit, 1262 struct virtnet_rq_stats *stats) 1263 { 1264 unsigned int xdp_headroom = (unsigned long)ctx; 1265 struct page *page = virt_to_head_page(buf); 1266 struct sk_buff *skb; 1267 1268 len -= vi->hdr_len; 1269 u64_stats_add(&stats->bytes, len); 1270 1271 if (unlikely(len > GOOD_PACKET_LEN)) { 1272 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1273 dev->name, len, GOOD_PACKET_LEN); 1274 DEV_STATS_INC(dev, rx_length_errors); 1275 goto err; 1276 } 1277 1278 if (unlikely(vi->xdp_enabled)) { 1279 struct bpf_prog *xdp_prog; 1280 1281 rcu_read_lock(); 1282 xdp_prog = rcu_dereference(rq->xdp_prog); 1283 if (xdp_prog) { 1284 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1285 xdp_headroom, len, xdp_xmit, 1286 stats); 1287 rcu_read_unlock(); 1288 return skb; 1289 } 1290 rcu_read_unlock(); 1291 } 1292 1293 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1294 if (likely(skb)) 1295 return skb; 1296 1297 err: 1298 u64_stats_inc(&stats->drops); 1299 put_page(page); 1300 return NULL; 1301 } 1302 1303 static struct sk_buff *receive_big(struct net_device *dev, 1304 struct virtnet_info *vi, 1305 struct receive_queue *rq, 1306 void *buf, 1307 unsigned int len, 1308 struct virtnet_rq_stats *stats) 1309 { 1310 struct page *page = buf; 1311 struct sk_buff *skb = 1312 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1313 1314 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1315 if (unlikely(!skb)) 1316 goto err; 1317 1318 return skb; 1319 1320 err: 1321 u64_stats_inc(&stats->drops); 1322 give_pages(rq, page); 1323 return NULL; 1324 } 1325 1326 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1327 struct net_device *dev, 1328 struct virtnet_rq_stats *stats) 1329 { 1330 struct page *page; 1331 void *buf; 1332 int len; 1333 1334 while (num_buf-- > 1) { 1335 buf = virtnet_rq_get_buf(rq, &len, NULL); 1336 if (unlikely(!buf)) { 1337 pr_debug("%s: rx error: %d buffers missing\n", 1338 dev->name, num_buf); 1339 DEV_STATS_INC(dev, rx_length_errors); 1340 break; 1341 } 1342 u64_stats_add(&stats->bytes, len); 1343 page = virt_to_head_page(buf); 1344 put_page(page); 1345 } 1346 } 1347 1348 /* Why not use xdp_build_skb_from_frame() ? 1349 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1350 * virtio-net there are 2 points that do not match its requirements: 1351 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1352 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1353 * like eth_type_trans() (which virtio-net does in receive_buf()). 1354 */ 1355 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1356 struct virtnet_info *vi, 1357 struct xdp_buff *xdp, 1358 unsigned int xdp_frags_truesz) 1359 { 1360 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1361 unsigned int headroom, data_len; 1362 struct sk_buff *skb; 1363 int metasize; 1364 u8 nr_frags; 1365 1366 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1367 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1368 return NULL; 1369 } 1370 1371 if (unlikely(xdp_buff_has_frags(xdp))) 1372 nr_frags = sinfo->nr_frags; 1373 1374 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1375 if (unlikely(!skb)) 1376 return NULL; 1377 1378 headroom = xdp->data - xdp->data_hard_start; 1379 data_len = xdp->data_end - xdp->data; 1380 skb_reserve(skb, headroom); 1381 __skb_put(skb, data_len); 1382 1383 metasize = xdp->data - xdp->data_meta; 1384 metasize = metasize > 0 ? metasize : 0; 1385 if (metasize) 1386 skb_metadata_set(skb, metasize); 1387 1388 if (unlikely(xdp_buff_has_frags(xdp))) 1389 xdp_update_skb_shared_info(skb, nr_frags, 1390 sinfo->xdp_frags_size, 1391 xdp_frags_truesz, 1392 xdp_buff_is_frag_pfmemalloc(xdp)); 1393 1394 return skb; 1395 } 1396 1397 /* TODO: build xdp in big mode */ 1398 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1399 struct virtnet_info *vi, 1400 struct receive_queue *rq, 1401 struct xdp_buff *xdp, 1402 void *buf, 1403 unsigned int len, 1404 unsigned int frame_sz, 1405 int *num_buf, 1406 unsigned int *xdp_frags_truesize, 1407 struct virtnet_rq_stats *stats) 1408 { 1409 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1410 unsigned int headroom, tailroom, room; 1411 unsigned int truesize, cur_frag_size; 1412 struct skb_shared_info *shinfo; 1413 unsigned int xdp_frags_truesz = 0; 1414 struct page *page; 1415 skb_frag_t *frag; 1416 int offset; 1417 void *ctx; 1418 1419 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1420 xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, 1421 VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1422 1423 if (!*num_buf) 1424 return 0; 1425 1426 if (*num_buf > 1) { 1427 /* If we want to build multi-buffer xdp, we need 1428 * to specify that the flags of xdp_buff have the 1429 * XDP_FLAGS_HAS_FRAG bit. 1430 */ 1431 if (!xdp_buff_has_frags(xdp)) 1432 xdp_buff_set_frags_flag(xdp); 1433 1434 shinfo = xdp_get_shared_info_from_buff(xdp); 1435 shinfo->nr_frags = 0; 1436 shinfo->xdp_frags_size = 0; 1437 } 1438 1439 if (*num_buf > MAX_SKB_FRAGS + 1) 1440 return -EINVAL; 1441 1442 while (--*num_buf > 0) { 1443 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1444 if (unlikely(!buf)) { 1445 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1446 dev->name, *num_buf, 1447 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1448 DEV_STATS_INC(dev, rx_length_errors); 1449 goto err; 1450 } 1451 1452 u64_stats_add(&stats->bytes, len); 1453 page = virt_to_head_page(buf); 1454 offset = buf - page_address(page); 1455 1456 truesize = mergeable_ctx_to_truesize(ctx); 1457 headroom = mergeable_ctx_to_headroom(ctx); 1458 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1459 room = SKB_DATA_ALIGN(headroom + tailroom); 1460 1461 cur_frag_size = truesize; 1462 xdp_frags_truesz += cur_frag_size; 1463 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 1464 put_page(page); 1465 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1466 dev->name, len, (unsigned long)(truesize - room)); 1467 DEV_STATS_INC(dev, rx_length_errors); 1468 goto err; 1469 } 1470 1471 frag = &shinfo->frags[shinfo->nr_frags++]; 1472 skb_frag_fill_page_desc(frag, page, offset, len); 1473 if (page_is_pfmemalloc(page)) 1474 xdp_buff_set_frag_pfmemalloc(xdp); 1475 1476 shinfo->xdp_frags_size += len; 1477 } 1478 1479 *xdp_frags_truesize = xdp_frags_truesz; 1480 return 0; 1481 1482 err: 1483 put_xdp_frags(xdp); 1484 return -EINVAL; 1485 } 1486 1487 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 1488 struct receive_queue *rq, 1489 struct bpf_prog *xdp_prog, 1490 void *ctx, 1491 unsigned int *frame_sz, 1492 int *num_buf, 1493 struct page **page, 1494 int offset, 1495 unsigned int *len, 1496 struct virtio_net_hdr_mrg_rxbuf *hdr) 1497 { 1498 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1499 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1500 struct page *xdp_page; 1501 unsigned int xdp_room; 1502 1503 /* Transient failure which in theory could occur if 1504 * in-flight packets from before XDP was enabled reach 1505 * the receive path after XDP is loaded. 1506 */ 1507 if (unlikely(hdr->hdr.gso_type)) 1508 return NULL; 1509 1510 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 1511 * with headroom may add hole in truesize, which 1512 * make their length exceed PAGE_SIZE. So we disabled the 1513 * hole mechanism for xdp. See add_recvbuf_mergeable(). 1514 */ 1515 *frame_sz = truesize; 1516 1517 if (likely(headroom >= virtnet_get_headroom(vi) && 1518 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 1519 return page_address(*page) + offset; 1520 } 1521 1522 /* This happens when headroom is not enough because 1523 * of the buffer was prefilled before XDP is set. 1524 * This should only happen for the first several packets. 1525 * In fact, vq reset can be used here to help us clean up 1526 * the prefilled buffers, but many existing devices do not 1527 * support it, and we don't want to bother users who are 1528 * using xdp normally. 1529 */ 1530 if (!xdp_prog->aux->xdp_has_frags) { 1531 /* linearize data for XDP */ 1532 xdp_page = xdp_linearize_page(rq, num_buf, 1533 *page, offset, 1534 VIRTIO_XDP_HEADROOM, 1535 len); 1536 if (!xdp_page) 1537 return NULL; 1538 } else { 1539 xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 1540 sizeof(struct skb_shared_info)); 1541 if (*len + xdp_room > PAGE_SIZE) 1542 return NULL; 1543 1544 xdp_page = alloc_page(GFP_ATOMIC); 1545 if (!xdp_page) 1546 return NULL; 1547 1548 memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, 1549 page_address(*page) + offset, *len); 1550 } 1551 1552 *frame_sz = PAGE_SIZE; 1553 1554 put_page(*page); 1555 1556 *page = xdp_page; 1557 1558 return page_address(*page) + VIRTIO_XDP_HEADROOM; 1559 } 1560 1561 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 1562 struct virtnet_info *vi, 1563 struct receive_queue *rq, 1564 struct bpf_prog *xdp_prog, 1565 void *buf, 1566 void *ctx, 1567 unsigned int len, 1568 unsigned int *xdp_xmit, 1569 struct virtnet_rq_stats *stats) 1570 { 1571 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1572 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1573 struct page *page = virt_to_head_page(buf); 1574 int offset = buf - page_address(page); 1575 unsigned int xdp_frags_truesz = 0; 1576 struct sk_buff *head_skb; 1577 unsigned int frame_sz; 1578 struct xdp_buff xdp; 1579 void *data; 1580 u32 act; 1581 int err; 1582 1583 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 1584 offset, &len, hdr); 1585 if (unlikely(!data)) 1586 goto err_xdp; 1587 1588 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 1589 &num_buf, &xdp_frags_truesz, stats); 1590 if (unlikely(err)) 1591 goto err_xdp; 1592 1593 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1594 1595 switch (act) { 1596 case XDP_PASS: 1597 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 1598 if (unlikely(!head_skb)) 1599 break; 1600 return head_skb; 1601 1602 case XDP_TX: 1603 case XDP_REDIRECT: 1604 return NULL; 1605 1606 default: 1607 break; 1608 } 1609 1610 put_xdp_frags(&xdp); 1611 1612 err_xdp: 1613 put_page(page); 1614 mergeable_buf_free(rq, num_buf, dev, stats); 1615 1616 u64_stats_inc(&stats->xdp_drops); 1617 u64_stats_inc(&stats->drops); 1618 return NULL; 1619 } 1620 1621 static struct sk_buff *receive_mergeable(struct net_device *dev, 1622 struct virtnet_info *vi, 1623 struct receive_queue *rq, 1624 void *buf, 1625 void *ctx, 1626 unsigned int len, 1627 unsigned int *xdp_xmit, 1628 struct virtnet_rq_stats *stats) 1629 { 1630 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1631 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1632 struct page *page = virt_to_head_page(buf); 1633 int offset = buf - page_address(page); 1634 struct sk_buff *head_skb, *curr_skb; 1635 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1636 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1637 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1638 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1639 1640 head_skb = NULL; 1641 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1642 1643 if (unlikely(len > truesize - room)) { 1644 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1645 dev->name, len, (unsigned long)(truesize - room)); 1646 DEV_STATS_INC(dev, rx_length_errors); 1647 goto err_skb; 1648 } 1649 1650 if (unlikely(vi->xdp_enabled)) { 1651 struct bpf_prog *xdp_prog; 1652 1653 rcu_read_lock(); 1654 xdp_prog = rcu_dereference(rq->xdp_prog); 1655 if (xdp_prog) { 1656 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 1657 len, xdp_xmit, stats); 1658 rcu_read_unlock(); 1659 return head_skb; 1660 } 1661 rcu_read_unlock(); 1662 } 1663 1664 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 1665 curr_skb = head_skb; 1666 1667 if (unlikely(!curr_skb)) 1668 goto err_skb; 1669 while (--num_buf) { 1670 int num_skb_frags; 1671 1672 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1673 if (unlikely(!buf)) { 1674 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1675 dev->name, num_buf, 1676 virtio16_to_cpu(vi->vdev, 1677 hdr->num_buffers)); 1678 DEV_STATS_INC(dev, rx_length_errors); 1679 goto err_buf; 1680 } 1681 1682 u64_stats_add(&stats->bytes, len); 1683 page = virt_to_head_page(buf); 1684 1685 truesize = mergeable_ctx_to_truesize(ctx); 1686 headroom = mergeable_ctx_to_headroom(ctx); 1687 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1688 room = SKB_DATA_ALIGN(headroom + tailroom); 1689 if (unlikely(len > truesize - room)) { 1690 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1691 dev->name, len, (unsigned long)(truesize - room)); 1692 DEV_STATS_INC(dev, rx_length_errors); 1693 goto err_skb; 1694 } 1695 1696 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 1697 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 1698 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 1699 1700 if (unlikely(!nskb)) 1701 goto err_skb; 1702 if (curr_skb == head_skb) 1703 skb_shinfo(curr_skb)->frag_list = nskb; 1704 else 1705 curr_skb->next = nskb; 1706 curr_skb = nskb; 1707 head_skb->truesize += nskb->truesize; 1708 num_skb_frags = 0; 1709 } 1710 if (curr_skb != head_skb) { 1711 head_skb->data_len += len; 1712 head_skb->len += len; 1713 head_skb->truesize += truesize; 1714 } 1715 offset = buf - page_address(page); 1716 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 1717 put_page(page); 1718 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 1719 len, truesize); 1720 } else { 1721 skb_add_rx_frag(curr_skb, num_skb_frags, page, 1722 offset, len, truesize); 1723 } 1724 } 1725 1726 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 1727 return head_skb; 1728 1729 err_skb: 1730 put_page(page); 1731 mergeable_buf_free(rq, num_buf, dev, stats); 1732 1733 err_buf: 1734 u64_stats_inc(&stats->drops); 1735 dev_kfree_skb(head_skb); 1736 return NULL; 1737 } 1738 1739 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 1740 struct sk_buff *skb) 1741 { 1742 enum pkt_hash_types rss_hash_type; 1743 1744 if (!hdr_hash || !skb) 1745 return; 1746 1747 switch (__le16_to_cpu(hdr_hash->hash_report)) { 1748 case VIRTIO_NET_HASH_REPORT_TCPv4: 1749 case VIRTIO_NET_HASH_REPORT_UDPv4: 1750 case VIRTIO_NET_HASH_REPORT_TCPv6: 1751 case VIRTIO_NET_HASH_REPORT_UDPv6: 1752 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 1753 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 1754 rss_hash_type = PKT_HASH_TYPE_L4; 1755 break; 1756 case VIRTIO_NET_HASH_REPORT_IPv4: 1757 case VIRTIO_NET_HASH_REPORT_IPv6: 1758 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 1759 rss_hash_type = PKT_HASH_TYPE_L3; 1760 break; 1761 case VIRTIO_NET_HASH_REPORT_NONE: 1762 default: 1763 rss_hash_type = PKT_HASH_TYPE_NONE; 1764 } 1765 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 1766 } 1767 1768 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1769 void *buf, unsigned int len, void **ctx, 1770 unsigned int *xdp_xmit, 1771 struct virtnet_rq_stats *stats) 1772 { 1773 struct net_device *dev = vi->dev; 1774 struct sk_buff *skb; 1775 struct virtio_net_common_hdr *hdr; 1776 1777 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1778 pr_debug("%s: short packet %i\n", dev->name, len); 1779 DEV_STATS_INC(dev, rx_length_errors); 1780 virtnet_rq_free_buf(vi, rq, buf); 1781 return; 1782 } 1783 1784 if (vi->mergeable_rx_bufs) 1785 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1786 stats); 1787 else if (vi->big_packets) 1788 skb = receive_big(dev, vi, rq, buf, len, stats); 1789 else 1790 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1791 1792 if (unlikely(!skb)) 1793 return; 1794 1795 hdr = skb_vnet_common_hdr(skb); 1796 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 1797 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 1798 1799 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1800 skb->ip_summed = CHECKSUM_UNNECESSARY; 1801 1802 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1803 virtio_is_little_endian(vi->vdev))) { 1804 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1805 dev->name, hdr->hdr.gso_type, 1806 hdr->hdr.gso_size); 1807 goto frame_err; 1808 } 1809 1810 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 1811 skb->protocol = eth_type_trans(skb, dev); 1812 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1813 ntohs(skb->protocol), skb->len, skb->pkt_type); 1814 1815 napi_gro_receive(&rq->napi, skb); 1816 return; 1817 1818 frame_err: 1819 DEV_STATS_INC(dev, rx_frame_errors); 1820 dev_kfree_skb(skb); 1821 } 1822 1823 /* Unlike mergeable buffers, all buffers are allocated to the 1824 * same size, except for the headroom. For this reason we do 1825 * not need to use mergeable_len_to_ctx here - it is enough 1826 * to store the headroom as the context ignoring the truesize. 1827 */ 1828 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 1829 gfp_t gfp) 1830 { 1831 char *buf; 1832 unsigned int xdp_headroom = virtnet_get_headroom(vi); 1833 void *ctx = (void *)(unsigned long)xdp_headroom; 1834 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 1835 int err; 1836 1837 len = SKB_DATA_ALIGN(len) + 1838 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1839 1840 buf = virtnet_rq_alloc(rq, len, gfp); 1841 if (unlikely(!buf)) 1842 return -ENOMEM; 1843 1844 virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, 1845 vi->hdr_len + GOOD_PACKET_LEN); 1846 1847 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1848 if (err < 0) { 1849 if (rq->do_dma) 1850 virtnet_rq_unmap(rq, buf, 0); 1851 put_page(virt_to_head_page(buf)); 1852 } 1853 1854 return err; 1855 } 1856 1857 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 1858 gfp_t gfp) 1859 { 1860 struct page *first, *list = NULL; 1861 char *p; 1862 int i, err, offset; 1863 1864 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 1865 1866 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 1867 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 1868 first = get_a_page(rq, gfp); 1869 if (!first) { 1870 if (list) 1871 give_pages(rq, list); 1872 return -ENOMEM; 1873 } 1874 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 1875 1876 /* chain new page in list head to match sg */ 1877 first->private = (unsigned long)list; 1878 list = first; 1879 } 1880 1881 first = get_a_page(rq, gfp); 1882 if (!first) { 1883 give_pages(rq, list); 1884 return -ENOMEM; 1885 } 1886 p = page_address(first); 1887 1888 /* rq->sg[0], rq->sg[1] share the same page */ 1889 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 1890 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 1891 1892 /* rq->sg[1] for data packet, from offset */ 1893 offset = sizeof(struct padded_vnet_hdr); 1894 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 1895 1896 /* chain first in list head */ 1897 first->private = (unsigned long)list; 1898 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 1899 first, gfp); 1900 if (err < 0) 1901 give_pages(rq, first); 1902 1903 return err; 1904 } 1905 1906 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1907 struct ewma_pkt_len *avg_pkt_len, 1908 unsigned int room) 1909 { 1910 struct virtnet_info *vi = rq->vq->vdev->priv; 1911 const size_t hdr_len = vi->hdr_len; 1912 unsigned int len; 1913 1914 if (room) 1915 return PAGE_SIZE - room; 1916 1917 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1918 rq->min_buf_len, PAGE_SIZE - hdr_len); 1919 1920 return ALIGN(len, L1_CACHE_BYTES); 1921 } 1922 1923 static int add_recvbuf_mergeable(struct virtnet_info *vi, 1924 struct receive_queue *rq, gfp_t gfp) 1925 { 1926 struct page_frag *alloc_frag = &rq->alloc_frag; 1927 unsigned int headroom = virtnet_get_headroom(vi); 1928 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1929 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1930 unsigned int len, hole; 1931 void *ctx; 1932 char *buf; 1933 int err; 1934 1935 /* Extra tailroom is needed to satisfy XDP's assumption. This 1936 * means rx frags coalescing won't work, but consider we've 1937 * disabled GSO for XDP, it won't be a big issue. 1938 */ 1939 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 1940 1941 buf = virtnet_rq_alloc(rq, len + room, gfp); 1942 if (unlikely(!buf)) 1943 return -ENOMEM; 1944 1945 buf += headroom; /* advance address leaving hole at front of pkt */ 1946 hole = alloc_frag->size - alloc_frag->offset; 1947 if (hole < len + room) { 1948 /* To avoid internal fragmentation, if there is very likely not 1949 * enough space for another buffer, add the remaining space to 1950 * the current buffer. 1951 * XDP core assumes that frame_size of xdp_buff and the length 1952 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 1953 */ 1954 if (!headroom) 1955 len += hole; 1956 alloc_frag->offset += hole; 1957 } 1958 1959 virtnet_rq_init_one_sg(rq, buf, len); 1960 1961 ctx = mergeable_len_to_ctx(len + room, headroom); 1962 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1963 if (err < 0) { 1964 if (rq->do_dma) 1965 virtnet_rq_unmap(rq, buf, 0); 1966 put_page(virt_to_head_page(buf)); 1967 } 1968 1969 return err; 1970 } 1971 1972 /* 1973 * Returns false if we couldn't fill entirely (OOM). 1974 * 1975 * Normally run in the receive path, but can also be run from ndo_open 1976 * before we're receiving packets, or from refill_work which is 1977 * careful to disable receiving (using napi_disable). 1978 */ 1979 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 1980 gfp_t gfp) 1981 { 1982 int err; 1983 bool oom; 1984 1985 do { 1986 if (vi->mergeable_rx_bufs) 1987 err = add_recvbuf_mergeable(vi, rq, gfp); 1988 else if (vi->big_packets) 1989 err = add_recvbuf_big(vi, rq, gfp); 1990 else 1991 err = add_recvbuf_small(vi, rq, gfp); 1992 1993 oom = err == -ENOMEM; 1994 if (err) 1995 break; 1996 } while (rq->vq->num_free); 1997 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 1998 unsigned long flags; 1999 2000 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2001 u64_stats_inc(&rq->stats.kicks); 2002 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2003 } 2004 2005 return !oom; 2006 } 2007 2008 static void skb_recv_done(struct virtqueue *rvq) 2009 { 2010 struct virtnet_info *vi = rvq->vdev->priv; 2011 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2012 2013 virtqueue_napi_schedule(&rq->napi, rvq); 2014 } 2015 2016 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2017 { 2018 napi_enable(napi); 2019 2020 /* If all buffers were filled by other side before we napi_enabled, we 2021 * won't get another interrupt, so process any outstanding packets now. 2022 * Call local_bh_enable after to trigger softIRQ processing. 2023 */ 2024 local_bh_disable(); 2025 virtqueue_napi_schedule(napi, vq); 2026 local_bh_enable(); 2027 } 2028 2029 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2030 struct virtqueue *vq, 2031 struct napi_struct *napi) 2032 { 2033 if (!napi->weight) 2034 return; 2035 2036 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2037 * enable the feature if this is likely affine with the transmit path. 2038 */ 2039 if (!vi->affinity_hint_set) { 2040 napi->weight = 0; 2041 return; 2042 } 2043 2044 return virtnet_napi_enable(vq, napi); 2045 } 2046 2047 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2048 { 2049 if (napi->weight) 2050 napi_disable(napi); 2051 } 2052 2053 static void refill_work(struct work_struct *work) 2054 { 2055 struct virtnet_info *vi = 2056 container_of(work, struct virtnet_info, refill.work); 2057 bool still_empty; 2058 int i; 2059 2060 for (i = 0; i < vi->curr_queue_pairs; i++) { 2061 struct receive_queue *rq = &vi->rq[i]; 2062 2063 napi_disable(&rq->napi); 2064 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2065 virtnet_napi_enable(rq->vq, &rq->napi); 2066 2067 /* In theory, this can happen: if we don't get any buffers in 2068 * we will *never* try to fill again. 2069 */ 2070 if (still_empty) 2071 schedule_delayed_work(&vi->refill, HZ/2); 2072 } 2073 } 2074 2075 static int virtnet_receive(struct receive_queue *rq, int budget, 2076 unsigned int *xdp_xmit) 2077 { 2078 struct virtnet_info *vi = rq->vq->vdev->priv; 2079 struct virtnet_rq_stats stats = {}; 2080 unsigned int len; 2081 int packets = 0; 2082 void *buf; 2083 int i; 2084 2085 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2086 void *ctx; 2087 2088 while (packets < budget && 2089 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2090 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); 2091 packets++; 2092 } 2093 } else { 2094 while (packets < budget && 2095 (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { 2096 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); 2097 packets++; 2098 } 2099 } 2100 2101 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2102 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2103 spin_lock(&vi->refill_lock); 2104 if (vi->refill_enabled) 2105 schedule_delayed_work(&vi->refill, 0); 2106 spin_unlock(&vi->refill_lock); 2107 } 2108 } 2109 2110 u64_stats_set(&stats.packets, packets); 2111 u64_stats_update_begin(&rq->stats.syncp); 2112 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { 2113 size_t offset = virtnet_rq_stats_desc[i].offset; 2114 u64_stats_t *item, *src; 2115 2116 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2117 src = (u64_stats_t *)((u8 *)&stats + offset); 2118 u64_stats_add(item, u64_stats_read(src)); 2119 } 2120 u64_stats_update_end(&rq->stats.syncp); 2121 2122 return packets; 2123 } 2124 2125 static void virtnet_poll_cleantx(struct receive_queue *rq) 2126 { 2127 struct virtnet_info *vi = rq->vq->vdev->priv; 2128 unsigned int index = vq2rxq(rq->vq); 2129 struct send_queue *sq = &vi->sq[index]; 2130 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2131 2132 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2133 return; 2134 2135 if (__netif_tx_trylock(txq)) { 2136 if (sq->reset) { 2137 __netif_tx_unlock(txq); 2138 return; 2139 } 2140 2141 do { 2142 virtqueue_disable_cb(sq->vq); 2143 free_old_xmit_skbs(sq, true); 2144 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2145 2146 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 2147 netif_tx_wake_queue(txq); 2148 2149 __netif_tx_unlock(txq); 2150 } 2151 } 2152 2153 static int virtnet_poll(struct napi_struct *napi, int budget) 2154 { 2155 struct receive_queue *rq = 2156 container_of(napi, struct receive_queue, napi); 2157 struct virtnet_info *vi = rq->vq->vdev->priv; 2158 struct send_queue *sq; 2159 unsigned int received; 2160 unsigned int xdp_xmit = 0; 2161 2162 virtnet_poll_cleantx(rq); 2163 2164 received = virtnet_receive(rq, budget, &xdp_xmit); 2165 2166 if (xdp_xmit & VIRTIO_XDP_REDIR) 2167 xdp_do_flush(); 2168 2169 /* Out of packets? */ 2170 if (received < budget) 2171 virtqueue_napi_complete(napi, rq->vq, received); 2172 2173 if (xdp_xmit & VIRTIO_XDP_TX) { 2174 sq = virtnet_xdp_get_sq(vi); 2175 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2176 u64_stats_update_begin(&sq->stats.syncp); 2177 u64_stats_inc(&sq->stats.kicks); 2178 u64_stats_update_end(&sq->stats.syncp); 2179 } 2180 virtnet_xdp_put_sq(vi, sq); 2181 } 2182 2183 return received; 2184 } 2185 2186 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2187 { 2188 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2189 napi_disable(&vi->rq[qp_index].napi); 2190 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2191 } 2192 2193 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2194 { 2195 struct net_device *dev = vi->dev; 2196 int err; 2197 2198 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2199 vi->rq[qp_index].napi.napi_id); 2200 if (err < 0) 2201 return err; 2202 2203 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2204 MEM_TYPE_PAGE_SHARED, NULL); 2205 if (err < 0) 2206 goto err_xdp_reg_mem_model; 2207 2208 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2209 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2210 2211 return 0; 2212 2213 err_xdp_reg_mem_model: 2214 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2215 return err; 2216 } 2217 2218 static int virtnet_open(struct net_device *dev) 2219 { 2220 struct virtnet_info *vi = netdev_priv(dev); 2221 int i, err; 2222 2223 enable_delayed_refill(vi); 2224 2225 for (i = 0; i < vi->max_queue_pairs; i++) { 2226 if (i < vi->curr_queue_pairs) 2227 /* Make sure we have some buffers: if oom use wq. */ 2228 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2229 schedule_delayed_work(&vi->refill, 0); 2230 2231 err = virtnet_enable_queue_pair(vi, i); 2232 if (err < 0) 2233 goto err_enable_qp; 2234 } 2235 2236 return 0; 2237 2238 err_enable_qp: 2239 disable_delayed_refill(vi); 2240 cancel_delayed_work_sync(&vi->refill); 2241 2242 for (i--; i >= 0; i--) 2243 virtnet_disable_queue_pair(vi, i); 2244 return err; 2245 } 2246 2247 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2248 { 2249 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2250 struct virtnet_info *vi = sq->vq->vdev->priv; 2251 unsigned int index = vq2txq(sq->vq); 2252 struct netdev_queue *txq; 2253 int opaque; 2254 bool done; 2255 2256 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2257 /* We don't need to enable cb for XDP */ 2258 napi_complete_done(napi, 0); 2259 return 0; 2260 } 2261 2262 txq = netdev_get_tx_queue(vi->dev, index); 2263 __netif_tx_lock(txq, raw_smp_processor_id()); 2264 virtqueue_disable_cb(sq->vq); 2265 free_old_xmit_skbs(sq, true); 2266 2267 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 2268 netif_tx_wake_queue(txq); 2269 2270 opaque = virtqueue_enable_cb_prepare(sq->vq); 2271 2272 done = napi_complete_done(napi, 0); 2273 2274 if (!done) 2275 virtqueue_disable_cb(sq->vq); 2276 2277 __netif_tx_unlock(txq); 2278 2279 if (done) { 2280 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2281 if (napi_schedule_prep(napi)) { 2282 __netif_tx_lock(txq, raw_smp_processor_id()); 2283 virtqueue_disable_cb(sq->vq); 2284 __netif_tx_unlock(txq); 2285 __napi_schedule(napi); 2286 } 2287 } 2288 } 2289 2290 return 0; 2291 } 2292 2293 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 2294 { 2295 struct virtio_net_hdr_mrg_rxbuf *hdr; 2296 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 2297 struct virtnet_info *vi = sq->vq->vdev->priv; 2298 int num_sg; 2299 unsigned hdr_len = vi->hdr_len; 2300 bool can_push; 2301 2302 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 2303 2304 can_push = vi->any_header_sg && 2305 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 2306 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 2307 /* Even if we can, don't push here yet as this would skew 2308 * csum_start offset below. */ 2309 if (can_push) 2310 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 2311 else 2312 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 2313 2314 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 2315 virtio_is_little_endian(vi->vdev), false, 2316 0)) 2317 return -EPROTO; 2318 2319 if (vi->mergeable_rx_bufs) 2320 hdr->num_buffers = 0; 2321 2322 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 2323 if (can_push) { 2324 __skb_push(skb, hdr_len); 2325 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 2326 if (unlikely(num_sg < 0)) 2327 return num_sg; 2328 /* Pull header back to avoid skew in tx bytes calculations. */ 2329 __skb_pull(skb, hdr_len); 2330 } else { 2331 sg_set_buf(sq->sg, hdr, hdr_len); 2332 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 2333 if (unlikely(num_sg < 0)) 2334 return num_sg; 2335 num_sg++; 2336 } 2337 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 2338 } 2339 2340 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 2341 { 2342 struct virtnet_info *vi = netdev_priv(dev); 2343 int qnum = skb_get_queue_mapping(skb); 2344 struct send_queue *sq = &vi->sq[qnum]; 2345 int err; 2346 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 2347 bool kick = !netdev_xmit_more(); 2348 bool use_napi = sq->napi.weight; 2349 2350 /* Free up any pending old buffers before queueing new ones. */ 2351 do { 2352 if (use_napi) 2353 virtqueue_disable_cb(sq->vq); 2354 2355 free_old_xmit_skbs(sq, false); 2356 2357 } while (use_napi && kick && 2358 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2359 2360 /* timestamp packet in software */ 2361 skb_tx_timestamp(skb); 2362 2363 /* Try to transmit */ 2364 err = xmit_skb(sq, skb); 2365 2366 /* This should not happen! */ 2367 if (unlikely(err)) { 2368 DEV_STATS_INC(dev, tx_fifo_errors); 2369 if (net_ratelimit()) 2370 dev_warn(&dev->dev, 2371 "Unexpected TXQ (%d) queue failure: %d\n", 2372 qnum, err); 2373 DEV_STATS_INC(dev, tx_dropped); 2374 dev_kfree_skb_any(skb); 2375 return NETDEV_TX_OK; 2376 } 2377 2378 /* Don't wait up for transmitted skbs to be freed. */ 2379 if (!use_napi) { 2380 skb_orphan(skb); 2381 nf_reset_ct(skb); 2382 } 2383 2384 check_sq_full_and_disable(vi, dev, sq); 2385 2386 if (kick || netif_xmit_stopped(txq)) { 2387 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2388 u64_stats_update_begin(&sq->stats.syncp); 2389 u64_stats_inc(&sq->stats.kicks); 2390 u64_stats_update_end(&sq->stats.syncp); 2391 } 2392 } 2393 2394 return NETDEV_TX_OK; 2395 } 2396 2397 static int virtnet_rx_resize(struct virtnet_info *vi, 2398 struct receive_queue *rq, u32 ring_num) 2399 { 2400 bool running = netif_running(vi->dev); 2401 int err, qindex; 2402 2403 qindex = rq - vi->rq; 2404 2405 if (running) 2406 napi_disable(&rq->napi); 2407 2408 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 2409 if (err) 2410 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 2411 2412 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 2413 schedule_delayed_work(&vi->refill, 0); 2414 2415 if (running) 2416 virtnet_napi_enable(rq->vq, &rq->napi); 2417 return err; 2418 } 2419 2420 static int virtnet_tx_resize(struct virtnet_info *vi, 2421 struct send_queue *sq, u32 ring_num) 2422 { 2423 bool running = netif_running(vi->dev); 2424 struct netdev_queue *txq; 2425 int err, qindex; 2426 2427 qindex = sq - vi->sq; 2428 2429 if (running) 2430 virtnet_napi_tx_disable(&sq->napi); 2431 2432 txq = netdev_get_tx_queue(vi->dev, qindex); 2433 2434 /* 1. wait all ximt complete 2435 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 2436 */ 2437 __netif_tx_lock_bh(txq); 2438 2439 /* Prevent rx poll from accessing sq. */ 2440 sq->reset = true; 2441 2442 /* Prevent the upper layer from trying to send packets. */ 2443 netif_stop_subqueue(vi->dev, qindex); 2444 2445 __netif_tx_unlock_bh(txq); 2446 2447 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 2448 if (err) 2449 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 2450 2451 __netif_tx_lock_bh(txq); 2452 sq->reset = false; 2453 netif_tx_wake_queue(txq); 2454 __netif_tx_unlock_bh(txq); 2455 2456 if (running) 2457 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 2458 return err; 2459 } 2460 2461 /* 2462 * Send command via the control virtqueue and check status. Commands 2463 * supported by the hypervisor, as indicated by feature bits, should 2464 * never fail unless improperly formatted. 2465 */ 2466 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 2467 struct scatterlist *out) 2468 { 2469 struct scatterlist *sgs[4], hdr, stat; 2470 unsigned out_num = 0, tmp; 2471 int ret; 2472 2473 /* Caller should know better */ 2474 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 2475 2476 vi->ctrl->status = ~0; 2477 vi->ctrl->hdr.class = class; 2478 vi->ctrl->hdr.cmd = cmd; 2479 /* Add header */ 2480 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 2481 sgs[out_num++] = &hdr; 2482 2483 if (out) 2484 sgs[out_num++] = out; 2485 2486 /* Add return status. */ 2487 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 2488 sgs[out_num] = &stat; 2489 2490 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); 2491 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); 2492 if (ret < 0) { 2493 dev_warn(&vi->vdev->dev, 2494 "Failed to add sgs for command vq: %d\n.", ret); 2495 return false; 2496 } 2497 2498 if (unlikely(!virtqueue_kick(vi->cvq))) 2499 return vi->ctrl->status == VIRTIO_NET_OK; 2500 2501 /* Spin for a response, the kick causes an ioport write, trapping 2502 * into the hypervisor, so the request should be handled immediately. 2503 */ 2504 while (!virtqueue_get_buf(vi->cvq, &tmp) && 2505 !virtqueue_is_broken(vi->cvq)) 2506 cpu_relax(); 2507 2508 return vi->ctrl->status == VIRTIO_NET_OK; 2509 } 2510 2511 static int virtnet_set_mac_address(struct net_device *dev, void *p) 2512 { 2513 struct virtnet_info *vi = netdev_priv(dev); 2514 struct virtio_device *vdev = vi->vdev; 2515 int ret; 2516 struct sockaddr *addr; 2517 struct scatterlist sg; 2518 2519 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 2520 return -EOPNOTSUPP; 2521 2522 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 2523 if (!addr) 2524 return -ENOMEM; 2525 2526 ret = eth_prepare_mac_addr_change(dev, addr); 2527 if (ret) 2528 goto out; 2529 2530 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 2531 sg_init_one(&sg, addr->sa_data, dev->addr_len); 2532 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2533 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 2534 dev_warn(&vdev->dev, 2535 "Failed to set mac address by vq command.\n"); 2536 ret = -EINVAL; 2537 goto out; 2538 } 2539 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 2540 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2541 unsigned int i; 2542 2543 /* Naturally, this has an atomicity problem. */ 2544 for (i = 0; i < dev->addr_len; i++) 2545 virtio_cwrite8(vdev, 2546 offsetof(struct virtio_net_config, mac) + 2547 i, addr->sa_data[i]); 2548 } 2549 2550 eth_commit_mac_addr_change(dev, p); 2551 ret = 0; 2552 2553 out: 2554 kfree(addr); 2555 return ret; 2556 } 2557 2558 static void virtnet_stats(struct net_device *dev, 2559 struct rtnl_link_stats64 *tot) 2560 { 2561 struct virtnet_info *vi = netdev_priv(dev); 2562 unsigned int start; 2563 int i; 2564 2565 for (i = 0; i < vi->max_queue_pairs; i++) { 2566 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 2567 struct receive_queue *rq = &vi->rq[i]; 2568 struct send_queue *sq = &vi->sq[i]; 2569 2570 do { 2571 start = u64_stats_fetch_begin(&sq->stats.syncp); 2572 tpackets = u64_stats_read(&sq->stats.packets); 2573 tbytes = u64_stats_read(&sq->stats.bytes); 2574 terrors = u64_stats_read(&sq->stats.tx_timeouts); 2575 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2576 2577 do { 2578 start = u64_stats_fetch_begin(&rq->stats.syncp); 2579 rpackets = u64_stats_read(&rq->stats.packets); 2580 rbytes = u64_stats_read(&rq->stats.bytes); 2581 rdrops = u64_stats_read(&rq->stats.drops); 2582 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2583 2584 tot->rx_packets += rpackets; 2585 tot->tx_packets += tpackets; 2586 tot->rx_bytes += rbytes; 2587 tot->tx_bytes += tbytes; 2588 tot->rx_dropped += rdrops; 2589 tot->tx_errors += terrors; 2590 } 2591 2592 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 2593 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 2594 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 2595 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 2596 } 2597 2598 static void virtnet_ack_link_announce(struct virtnet_info *vi) 2599 { 2600 rtnl_lock(); 2601 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 2602 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 2603 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 2604 rtnl_unlock(); 2605 } 2606 2607 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2608 { 2609 struct scatterlist sg; 2610 struct net_device *dev = vi->dev; 2611 2612 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 2613 return 0; 2614 2615 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 2616 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); 2617 2618 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2619 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 2620 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 2621 queue_pairs); 2622 return -EINVAL; 2623 } else { 2624 vi->curr_queue_pairs = queue_pairs; 2625 /* virtnet_open() will refill when device is going to up. */ 2626 if (dev->flags & IFF_UP) 2627 schedule_delayed_work(&vi->refill, 0); 2628 } 2629 2630 return 0; 2631 } 2632 2633 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2634 { 2635 int err; 2636 2637 rtnl_lock(); 2638 err = _virtnet_set_queues(vi, queue_pairs); 2639 rtnl_unlock(); 2640 return err; 2641 } 2642 2643 static int virtnet_close(struct net_device *dev) 2644 { 2645 struct virtnet_info *vi = netdev_priv(dev); 2646 int i; 2647 2648 /* Make sure NAPI doesn't schedule refill work */ 2649 disable_delayed_refill(vi); 2650 /* Make sure refill_work doesn't re-enable napi! */ 2651 cancel_delayed_work_sync(&vi->refill); 2652 2653 for (i = 0; i < vi->max_queue_pairs; i++) 2654 virtnet_disable_queue_pair(vi, i); 2655 2656 return 0; 2657 } 2658 2659 static void virtnet_set_rx_mode(struct net_device *dev) 2660 { 2661 struct virtnet_info *vi = netdev_priv(dev); 2662 struct scatterlist sg[2]; 2663 struct virtio_net_ctrl_mac *mac_data; 2664 struct netdev_hw_addr *ha; 2665 int uc_count; 2666 int mc_count; 2667 void *buf; 2668 int i; 2669 2670 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 2671 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 2672 return; 2673 2674 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); 2675 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); 2676 2677 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); 2678 2679 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2680 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 2681 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 2682 vi->ctrl->promisc ? "en" : "dis"); 2683 2684 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); 2685 2686 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2687 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 2688 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 2689 vi->ctrl->allmulti ? "en" : "dis"); 2690 2691 uc_count = netdev_uc_count(dev); 2692 mc_count = netdev_mc_count(dev); 2693 /* MAC filter - use one buffer for both lists */ 2694 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 2695 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 2696 mac_data = buf; 2697 if (!buf) 2698 return; 2699 2700 sg_init_table(sg, 2); 2701 2702 /* Store the unicast list and count in the front of the buffer */ 2703 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 2704 i = 0; 2705 netdev_for_each_uc_addr(ha, dev) 2706 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2707 2708 sg_set_buf(&sg[0], mac_data, 2709 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 2710 2711 /* multicast list and count fill the end */ 2712 mac_data = (void *)&mac_data->macs[uc_count][0]; 2713 2714 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 2715 i = 0; 2716 netdev_for_each_mc_addr(ha, dev) 2717 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2718 2719 sg_set_buf(&sg[1], mac_data, 2720 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 2721 2722 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2723 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 2724 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 2725 2726 kfree(buf); 2727 } 2728 2729 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 2730 __be16 proto, u16 vid) 2731 { 2732 struct virtnet_info *vi = netdev_priv(dev); 2733 struct scatterlist sg; 2734 2735 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2736 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2737 2738 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2739 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 2740 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 2741 return 0; 2742 } 2743 2744 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 2745 __be16 proto, u16 vid) 2746 { 2747 struct virtnet_info *vi = netdev_priv(dev); 2748 struct scatterlist sg; 2749 2750 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2751 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2752 2753 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2754 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 2755 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 2756 return 0; 2757 } 2758 2759 static void virtnet_clean_affinity(struct virtnet_info *vi) 2760 { 2761 int i; 2762 2763 if (vi->affinity_hint_set) { 2764 for (i = 0; i < vi->max_queue_pairs; i++) { 2765 virtqueue_set_affinity(vi->rq[i].vq, NULL); 2766 virtqueue_set_affinity(vi->sq[i].vq, NULL); 2767 } 2768 2769 vi->affinity_hint_set = false; 2770 } 2771 } 2772 2773 static void virtnet_set_affinity(struct virtnet_info *vi) 2774 { 2775 cpumask_var_t mask; 2776 int stragglers; 2777 int group_size; 2778 int i, j, cpu; 2779 int num_cpu; 2780 int stride; 2781 2782 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 2783 virtnet_clean_affinity(vi); 2784 return; 2785 } 2786 2787 num_cpu = num_online_cpus(); 2788 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 2789 stragglers = num_cpu >= vi->curr_queue_pairs ? 2790 num_cpu % vi->curr_queue_pairs : 2791 0; 2792 cpu = cpumask_first(cpu_online_mask); 2793 2794 for (i = 0; i < vi->curr_queue_pairs; i++) { 2795 group_size = stride + (i < stragglers ? 1 : 0); 2796 2797 for (j = 0; j < group_size; j++) { 2798 cpumask_set_cpu(cpu, mask); 2799 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 2800 nr_cpu_ids, false); 2801 } 2802 virtqueue_set_affinity(vi->rq[i].vq, mask); 2803 virtqueue_set_affinity(vi->sq[i].vq, mask); 2804 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 2805 cpumask_clear(mask); 2806 } 2807 2808 vi->affinity_hint_set = true; 2809 free_cpumask_var(mask); 2810 } 2811 2812 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 2813 { 2814 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2815 node); 2816 virtnet_set_affinity(vi); 2817 return 0; 2818 } 2819 2820 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 2821 { 2822 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2823 node_dead); 2824 virtnet_set_affinity(vi); 2825 return 0; 2826 } 2827 2828 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 2829 { 2830 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2831 node); 2832 2833 virtnet_clean_affinity(vi); 2834 return 0; 2835 } 2836 2837 static enum cpuhp_state virtionet_online; 2838 2839 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 2840 { 2841 int ret; 2842 2843 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 2844 if (ret) 2845 return ret; 2846 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2847 &vi->node_dead); 2848 if (!ret) 2849 return ret; 2850 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2851 return ret; 2852 } 2853 2854 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 2855 { 2856 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2857 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2858 &vi->node_dead); 2859 } 2860 2861 static void virtnet_get_ringparam(struct net_device *dev, 2862 struct ethtool_ringparam *ring, 2863 struct kernel_ethtool_ringparam *kernel_ring, 2864 struct netlink_ext_ack *extack) 2865 { 2866 struct virtnet_info *vi = netdev_priv(dev); 2867 2868 ring->rx_max_pending = vi->rq[0].vq->num_max; 2869 ring->tx_max_pending = vi->sq[0].vq->num_max; 2870 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2871 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2872 } 2873 2874 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 2875 u16 vqn, u32 max_usecs, u32 max_packets); 2876 2877 static int virtnet_set_ringparam(struct net_device *dev, 2878 struct ethtool_ringparam *ring, 2879 struct kernel_ethtool_ringparam *kernel_ring, 2880 struct netlink_ext_ack *extack) 2881 { 2882 struct virtnet_info *vi = netdev_priv(dev); 2883 u32 rx_pending, tx_pending; 2884 struct receive_queue *rq; 2885 struct send_queue *sq; 2886 int i, err; 2887 2888 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 2889 return -EINVAL; 2890 2891 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2892 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2893 2894 if (ring->rx_pending == rx_pending && 2895 ring->tx_pending == tx_pending) 2896 return 0; 2897 2898 if (ring->rx_pending > vi->rq[0].vq->num_max) 2899 return -EINVAL; 2900 2901 if (ring->tx_pending > vi->sq[0].vq->num_max) 2902 return -EINVAL; 2903 2904 for (i = 0; i < vi->max_queue_pairs; i++) { 2905 rq = vi->rq + i; 2906 sq = vi->sq + i; 2907 2908 if (ring->tx_pending != tx_pending) { 2909 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 2910 if (err) 2911 return err; 2912 2913 /* Upon disabling and re-enabling a transmit virtqueue, the device must 2914 * set the coalescing parameters of the virtqueue to those configured 2915 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 2916 * did not set any TX coalescing parameters, to 0. 2917 */ 2918 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(i), 2919 vi->intr_coal_tx.max_usecs, 2920 vi->intr_coal_tx.max_packets); 2921 if (err) 2922 return err; 2923 2924 vi->sq[i].intr_coal.max_usecs = vi->intr_coal_tx.max_usecs; 2925 vi->sq[i].intr_coal.max_packets = vi->intr_coal_tx.max_packets; 2926 } 2927 2928 if (ring->rx_pending != rx_pending) { 2929 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 2930 if (err) 2931 return err; 2932 2933 /* The reason is same as the transmit virtqueue reset */ 2934 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(i), 2935 vi->intr_coal_rx.max_usecs, 2936 vi->intr_coal_rx.max_packets); 2937 if (err) 2938 return err; 2939 2940 vi->rq[i].intr_coal.max_usecs = vi->intr_coal_rx.max_usecs; 2941 vi->rq[i].intr_coal.max_packets = vi->intr_coal_rx.max_packets; 2942 } 2943 } 2944 2945 return 0; 2946 } 2947 2948 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 2949 { 2950 struct net_device *dev = vi->dev; 2951 struct scatterlist sgs[4]; 2952 unsigned int sg_buf_size; 2953 2954 /* prepare sgs */ 2955 sg_init_table(sgs, 4); 2956 2957 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 2958 sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size); 2959 2960 sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); 2961 sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size); 2962 2963 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 2964 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 2965 sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size); 2966 2967 sg_buf_size = vi->rss_key_size; 2968 sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); 2969 2970 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2971 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 2972 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) { 2973 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 2974 return false; 2975 } 2976 return true; 2977 } 2978 2979 static void virtnet_init_default_rss(struct virtnet_info *vi) 2980 { 2981 u32 indir_val = 0; 2982 int i = 0; 2983 2984 vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; 2985 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 2986 vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size 2987 ? vi->rss_indir_table_size - 1 : 0; 2988 vi->ctrl->rss.unclassified_queue = 0; 2989 2990 for (; i < vi->rss_indir_table_size; ++i) { 2991 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 2992 vi->ctrl->rss.indirection_table[i] = indir_val; 2993 } 2994 2995 vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 2996 vi->ctrl->rss.hash_key_length = vi->rss_key_size; 2997 2998 netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); 2999 } 3000 3001 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3002 { 3003 info->data = 0; 3004 switch (info->flow_type) { 3005 case TCP_V4_FLOW: 3006 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3007 info->data = RXH_IP_SRC | RXH_IP_DST | 3008 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3009 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3010 info->data = RXH_IP_SRC | RXH_IP_DST; 3011 } 3012 break; 3013 case TCP_V6_FLOW: 3014 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3015 info->data = RXH_IP_SRC | RXH_IP_DST | 3016 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3017 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3018 info->data = RXH_IP_SRC | RXH_IP_DST; 3019 } 3020 break; 3021 case UDP_V4_FLOW: 3022 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3023 info->data = RXH_IP_SRC | RXH_IP_DST | 3024 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3025 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3026 info->data = RXH_IP_SRC | RXH_IP_DST; 3027 } 3028 break; 3029 case UDP_V6_FLOW: 3030 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3031 info->data = RXH_IP_SRC | RXH_IP_DST | 3032 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3033 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3034 info->data = RXH_IP_SRC | RXH_IP_DST; 3035 } 3036 break; 3037 case IPV4_FLOW: 3038 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3039 info->data = RXH_IP_SRC | RXH_IP_DST; 3040 3041 break; 3042 case IPV6_FLOW: 3043 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3044 info->data = RXH_IP_SRC | RXH_IP_DST; 3045 3046 break; 3047 default: 3048 info->data = 0; 3049 break; 3050 } 3051 } 3052 3053 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3054 { 3055 u32 new_hashtypes = vi->rss_hash_types_saved; 3056 bool is_disable = info->data & RXH_DISCARD; 3057 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3058 3059 /* supports only 'sd', 'sdfn' and 'r' */ 3060 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3061 return false; 3062 3063 switch (info->flow_type) { 3064 case TCP_V4_FLOW: 3065 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3066 if (!is_disable) 3067 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3068 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3069 break; 3070 case UDP_V4_FLOW: 3071 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3072 if (!is_disable) 3073 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3074 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3075 break; 3076 case IPV4_FLOW: 3077 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3078 if (!is_disable) 3079 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3080 break; 3081 case TCP_V6_FLOW: 3082 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3083 if (!is_disable) 3084 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3085 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3086 break; 3087 case UDP_V6_FLOW: 3088 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3089 if (!is_disable) 3090 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3091 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3092 break; 3093 case IPV6_FLOW: 3094 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3095 if (!is_disable) 3096 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3097 break; 3098 default: 3099 /* unsupported flow */ 3100 return false; 3101 } 3102 3103 /* if unsupported hashtype was set */ 3104 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3105 return false; 3106 3107 if (new_hashtypes != vi->rss_hash_types_saved) { 3108 vi->rss_hash_types_saved = new_hashtypes; 3109 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3110 if (vi->dev->features & NETIF_F_RXHASH) 3111 return virtnet_commit_rss_command(vi); 3112 } 3113 3114 return true; 3115 } 3116 3117 static void virtnet_get_drvinfo(struct net_device *dev, 3118 struct ethtool_drvinfo *info) 3119 { 3120 struct virtnet_info *vi = netdev_priv(dev); 3121 struct virtio_device *vdev = vi->vdev; 3122 3123 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 3124 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 3125 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 3126 3127 } 3128 3129 /* TODO: Eliminate OOO packets during switching */ 3130 static int virtnet_set_channels(struct net_device *dev, 3131 struct ethtool_channels *channels) 3132 { 3133 struct virtnet_info *vi = netdev_priv(dev); 3134 u16 queue_pairs = channels->combined_count; 3135 int err; 3136 3137 /* We don't support separate rx/tx channels. 3138 * We don't allow setting 'other' channels. 3139 */ 3140 if (channels->rx_count || channels->tx_count || channels->other_count) 3141 return -EINVAL; 3142 3143 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 3144 return -EINVAL; 3145 3146 /* For now we don't support modifying channels while XDP is loaded 3147 * also when XDP is loaded all RX queues have XDP programs so we only 3148 * need to check a single RX queue. 3149 */ 3150 if (vi->rq[0].xdp_prog) 3151 return -EINVAL; 3152 3153 cpus_read_lock(); 3154 err = _virtnet_set_queues(vi, queue_pairs); 3155 if (err) { 3156 cpus_read_unlock(); 3157 goto err; 3158 } 3159 virtnet_set_affinity(vi); 3160 cpus_read_unlock(); 3161 3162 netif_set_real_num_tx_queues(dev, queue_pairs); 3163 netif_set_real_num_rx_queues(dev, queue_pairs); 3164 err: 3165 return err; 3166 } 3167 3168 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 3169 { 3170 struct virtnet_info *vi = netdev_priv(dev); 3171 unsigned int i, j; 3172 u8 *p = data; 3173 3174 switch (stringset) { 3175 case ETH_SS_STATS: 3176 for (i = 0; i < vi->curr_queue_pairs; i++) { 3177 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) 3178 ethtool_sprintf(&p, "rx_queue_%u_%s", i, 3179 virtnet_rq_stats_desc[j].desc); 3180 } 3181 3182 for (i = 0; i < vi->curr_queue_pairs; i++) { 3183 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) 3184 ethtool_sprintf(&p, "tx_queue_%u_%s", i, 3185 virtnet_sq_stats_desc[j].desc); 3186 } 3187 break; 3188 } 3189 } 3190 3191 static int virtnet_get_sset_count(struct net_device *dev, int sset) 3192 { 3193 struct virtnet_info *vi = netdev_priv(dev); 3194 3195 switch (sset) { 3196 case ETH_SS_STATS: 3197 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + 3198 VIRTNET_SQ_STATS_LEN); 3199 default: 3200 return -EOPNOTSUPP; 3201 } 3202 } 3203 3204 static void virtnet_get_ethtool_stats(struct net_device *dev, 3205 struct ethtool_stats *stats, u64 *data) 3206 { 3207 struct virtnet_info *vi = netdev_priv(dev); 3208 unsigned int idx = 0, start, i, j; 3209 const u8 *stats_base; 3210 const u64_stats_t *p; 3211 size_t offset; 3212 3213 for (i = 0; i < vi->curr_queue_pairs; i++) { 3214 struct receive_queue *rq = &vi->rq[i]; 3215 3216 stats_base = (const u8 *)&rq->stats; 3217 do { 3218 start = u64_stats_fetch_begin(&rq->stats.syncp); 3219 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 3220 offset = virtnet_rq_stats_desc[j].offset; 3221 p = (const u64_stats_t *)(stats_base + offset); 3222 data[idx + j] = u64_stats_read(p); 3223 } 3224 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3225 idx += VIRTNET_RQ_STATS_LEN; 3226 } 3227 3228 for (i = 0; i < vi->curr_queue_pairs; i++) { 3229 struct send_queue *sq = &vi->sq[i]; 3230 3231 stats_base = (const u8 *)&sq->stats; 3232 do { 3233 start = u64_stats_fetch_begin(&sq->stats.syncp); 3234 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 3235 offset = virtnet_sq_stats_desc[j].offset; 3236 p = (const u64_stats_t *)(stats_base + offset); 3237 data[idx + j] = u64_stats_read(p); 3238 } 3239 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3240 idx += VIRTNET_SQ_STATS_LEN; 3241 } 3242 } 3243 3244 static void virtnet_get_channels(struct net_device *dev, 3245 struct ethtool_channels *channels) 3246 { 3247 struct virtnet_info *vi = netdev_priv(dev); 3248 3249 channels->combined_count = vi->curr_queue_pairs; 3250 channels->max_combined = vi->max_queue_pairs; 3251 channels->max_other = 0; 3252 channels->rx_count = 0; 3253 channels->tx_count = 0; 3254 channels->other_count = 0; 3255 } 3256 3257 static int virtnet_set_link_ksettings(struct net_device *dev, 3258 const struct ethtool_link_ksettings *cmd) 3259 { 3260 struct virtnet_info *vi = netdev_priv(dev); 3261 3262 return ethtool_virtdev_set_link_ksettings(dev, cmd, 3263 &vi->speed, &vi->duplex); 3264 } 3265 3266 static int virtnet_get_link_ksettings(struct net_device *dev, 3267 struct ethtool_link_ksettings *cmd) 3268 { 3269 struct virtnet_info *vi = netdev_priv(dev); 3270 3271 cmd->base.speed = vi->speed; 3272 cmd->base.duplex = vi->duplex; 3273 cmd->base.port = PORT_OTHER; 3274 3275 return 0; 3276 } 3277 3278 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 3279 struct ethtool_coalesce *ec) 3280 { 3281 struct scatterlist sgs_tx, sgs_rx; 3282 int i; 3283 3284 vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 3285 vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 3286 sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx)); 3287 3288 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3289 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 3290 &sgs_tx)) 3291 return -EINVAL; 3292 3293 /* Save parameters */ 3294 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 3295 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 3296 for (i = 0; i < vi->max_queue_pairs; i++) { 3297 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 3298 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 3299 } 3300 3301 vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 3302 vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 3303 sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx)); 3304 3305 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3306 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 3307 &sgs_rx)) 3308 return -EINVAL; 3309 3310 /* Save parameters */ 3311 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 3312 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 3313 for (i = 0; i < vi->max_queue_pairs; i++) { 3314 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 3315 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 3316 } 3317 3318 return 0; 3319 } 3320 3321 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3322 u16 vqn, u32 max_usecs, u32 max_packets) 3323 { 3324 struct scatterlist sgs; 3325 3326 vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn); 3327 vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs); 3328 vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets); 3329 sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq)); 3330 3331 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3332 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3333 &sgs)) 3334 return -EINVAL; 3335 3336 return 0; 3337 } 3338 3339 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 3340 struct ethtool_coalesce *ec, 3341 u16 queue) 3342 { 3343 int err; 3344 3345 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3346 ec->rx_coalesce_usecs, 3347 ec->rx_max_coalesced_frames); 3348 if (err) 3349 return err; 3350 3351 vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs; 3352 vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames; 3353 3354 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3355 ec->tx_coalesce_usecs, 3356 ec->tx_max_coalesced_frames); 3357 if (err) 3358 return err; 3359 3360 vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs; 3361 vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames; 3362 3363 return 0; 3364 } 3365 3366 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 3367 { 3368 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 3369 * feature is negotiated. 3370 */ 3371 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 3372 return -EOPNOTSUPP; 3373 3374 if (ec->tx_max_coalesced_frames > 1 || 3375 ec->rx_max_coalesced_frames != 1) 3376 return -EINVAL; 3377 3378 return 0; 3379 } 3380 3381 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 3382 int vq_weight, bool *should_update) 3383 { 3384 if (weight ^ vq_weight) { 3385 if (dev_flags & IFF_UP) 3386 return -EBUSY; 3387 *should_update = true; 3388 } 3389 3390 return 0; 3391 } 3392 3393 static int virtnet_set_coalesce(struct net_device *dev, 3394 struct ethtool_coalesce *ec, 3395 struct kernel_ethtool_coalesce *kernel_coal, 3396 struct netlink_ext_ack *extack) 3397 { 3398 struct virtnet_info *vi = netdev_priv(dev); 3399 int ret, queue_number, napi_weight; 3400 bool update_napi = false; 3401 3402 /* Can't change NAPI weight if the link is up */ 3403 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 3404 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 3405 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 3406 vi->sq[queue_number].napi.weight, 3407 &update_napi); 3408 if (ret) 3409 return ret; 3410 3411 if (update_napi) { 3412 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 3413 * updated for the sake of simplicity, which might not be necessary 3414 */ 3415 break; 3416 } 3417 } 3418 3419 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 3420 ret = virtnet_send_notf_coal_cmds(vi, ec); 3421 else 3422 ret = virtnet_coal_params_supported(ec); 3423 3424 if (ret) 3425 return ret; 3426 3427 if (update_napi) { 3428 for (; queue_number < vi->max_queue_pairs; queue_number++) 3429 vi->sq[queue_number].napi.weight = napi_weight; 3430 } 3431 3432 return ret; 3433 } 3434 3435 static int virtnet_get_coalesce(struct net_device *dev, 3436 struct ethtool_coalesce *ec, 3437 struct kernel_ethtool_coalesce *kernel_coal, 3438 struct netlink_ext_ack *extack) 3439 { 3440 struct virtnet_info *vi = netdev_priv(dev); 3441 3442 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 3443 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 3444 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 3445 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 3446 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 3447 } else { 3448 ec->rx_max_coalesced_frames = 1; 3449 3450 if (vi->sq[0].napi.weight) 3451 ec->tx_max_coalesced_frames = 1; 3452 } 3453 3454 return 0; 3455 } 3456 3457 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 3458 u32 queue, 3459 struct ethtool_coalesce *ec) 3460 { 3461 struct virtnet_info *vi = netdev_priv(dev); 3462 int ret, napi_weight; 3463 bool update_napi = false; 3464 3465 if (queue >= vi->max_queue_pairs) 3466 return -EINVAL; 3467 3468 /* Can't change NAPI weight if the link is up */ 3469 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 3470 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 3471 vi->sq[queue].napi.weight, 3472 &update_napi); 3473 if (ret) 3474 return ret; 3475 3476 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3477 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 3478 else 3479 ret = virtnet_coal_params_supported(ec); 3480 3481 if (ret) 3482 return ret; 3483 3484 if (update_napi) 3485 vi->sq[queue].napi.weight = napi_weight; 3486 3487 return 0; 3488 } 3489 3490 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 3491 u32 queue, 3492 struct ethtool_coalesce *ec) 3493 { 3494 struct virtnet_info *vi = netdev_priv(dev); 3495 3496 if (queue >= vi->max_queue_pairs) 3497 return -EINVAL; 3498 3499 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 3500 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 3501 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 3502 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 3503 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 3504 } else { 3505 ec->rx_max_coalesced_frames = 1; 3506 3507 if (vi->sq[queue].napi.weight) 3508 ec->tx_max_coalesced_frames = 1; 3509 } 3510 3511 return 0; 3512 } 3513 3514 static void virtnet_init_settings(struct net_device *dev) 3515 { 3516 struct virtnet_info *vi = netdev_priv(dev); 3517 3518 vi->speed = SPEED_UNKNOWN; 3519 vi->duplex = DUPLEX_UNKNOWN; 3520 } 3521 3522 static void virtnet_update_settings(struct virtnet_info *vi) 3523 { 3524 u32 speed; 3525 u8 duplex; 3526 3527 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3528 return; 3529 3530 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3531 3532 if (ethtool_validate_speed(speed)) 3533 vi->speed = speed; 3534 3535 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3536 3537 if (ethtool_validate_duplex(duplex)) 3538 vi->duplex = duplex; 3539 } 3540 3541 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 3542 { 3543 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 3544 } 3545 3546 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 3547 { 3548 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 3549 } 3550 3551 static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) 3552 { 3553 struct virtnet_info *vi = netdev_priv(dev); 3554 int i; 3555 3556 if (indir) { 3557 for (i = 0; i < vi->rss_indir_table_size; ++i) 3558 indir[i] = vi->ctrl->rss.indirection_table[i]; 3559 } 3560 3561 if (key) 3562 memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); 3563 3564 if (hfunc) 3565 *hfunc = ETH_RSS_HASH_TOP; 3566 3567 return 0; 3568 } 3569 3570 static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) 3571 { 3572 struct virtnet_info *vi = netdev_priv(dev); 3573 int i; 3574 3575 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 3576 return -EOPNOTSUPP; 3577 3578 if (indir) { 3579 for (i = 0; i < vi->rss_indir_table_size; ++i) 3580 vi->ctrl->rss.indirection_table[i] = indir[i]; 3581 } 3582 if (key) 3583 memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); 3584 3585 virtnet_commit_rss_command(vi); 3586 3587 return 0; 3588 } 3589 3590 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 3591 { 3592 struct virtnet_info *vi = netdev_priv(dev); 3593 int rc = 0; 3594 3595 switch (info->cmd) { 3596 case ETHTOOL_GRXRINGS: 3597 info->data = vi->curr_queue_pairs; 3598 break; 3599 case ETHTOOL_GRXFH: 3600 virtnet_get_hashflow(vi, info); 3601 break; 3602 default: 3603 rc = -EOPNOTSUPP; 3604 } 3605 3606 return rc; 3607 } 3608 3609 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 3610 { 3611 struct virtnet_info *vi = netdev_priv(dev); 3612 int rc = 0; 3613 3614 switch (info->cmd) { 3615 case ETHTOOL_SRXFH: 3616 if (!virtnet_set_hashflow(vi, info)) 3617 rc = -EINVAL; 3618 3619 break; 3620 default: 3621 rc = -EOPNOTSUPP; 3622 } 3623 3624 return rc; 3625 } 3626 3627 static const struct ethtool_ops virtnet_ethtool_ops = { 3628 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 3629 ETHTOOL_COALESCE_USECS, 3630 .get_drvinfo = virtnet_get_drvinfo, 3631 .get_link = ethtool_op_get_link, 3632 .get_ringparam = virtnet_get_ringparam, 3633 .set_ringparam = virtnet_set_ringparam, 3634 .get_strings = virtnet_get_strings, 3635 .get_sset_count = virtnet_get_sset_count, 3636 .get_ethtool_stats = virtnet_get_ethtool_stats, 3637 .set_channels = virtnet_set_channels, 3638 .get_channels = virtnet_get_channels, 3639 .get_ts_info = ethtool_op_get_ts_info, 3640 .get_link_ksettings = virtnet_get_link_ksettings, 3641 .set_link_ksettings = virtnet_set_link_ksettings, 3642 .set_coalesce = virtnet_set_coalesce, 3643 .get_coalesce = virtnet_get_coalesce, 3644 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 3645 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 3646 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 3647 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 3648 .get_rxfh = virtnet_get_rxfh, 3649 .set_rxfh = virtnet_set_rxfh, 3650 .get_rxnfc = virtnet_get_rxnfc, 3651 .set_rxnfc = virtnet_set_rxnfc, 3652 }; 3653 3654 static void virtnet_freeze_down(struct virtio_device *vdev) 3655 { 3656 struct virtnet_info *vi = vdev->priv; 3657 3658 /* Make sure no work handler is accessing the device */ 3659 flush_work(&vi->config_work); 3660 3661 netif_tx_lock_bh(vi->dev); 3662 netif_device_detach(vi->dev); 3663 netif_tx_unlock_bh(vi->dev); 3664 if (netif_running(vi->dev)) 3665 virtnet_close(vi->dev); 3666 } 3667 3668 static int init_vqs(struct virtnet_info *vi); 3669 3670 static int virtnet_restore_up(struct virtio_device *vdev) 3671 { 3672 struct virtnet_info *vi = vdev->priv; 3673 int err; 3674 3675 err = init_vqs(vi); 3676 if (err) 3677 return err; 3678 3679 virtio_device_ready(vdev); 3680 3681 enable_delayed_refill(vi); 3682 3683 if (netif_running(vi->dev)) { 3684 err = virtnet_open(vi->dev); 3685 if (err) 3686 return err; 3687 } 3688 3689 netif_tx_lock_bh(vi->dev); 3690 netif_device_attach(vi->dev); 3691 netif_tx_unlock_bh(vi->dev); 3692 return err; 3693 } 3694 3695 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 3696 { 3697 struct scatterlist sg; 3698 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); 3699 3700 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); 3701 3702 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 3703 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 3704 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 3705 return -EINVAL; 3706 } 3707 3708 return 0; 3709 } 3710 3711 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 3712 { 3713 u64 offloads = 0; 3714 3715 if (!vi->guest_offloads) 3716 return 0; 3717 3718 return virtnet_set_guest_offloads(vi, offloads); 3719 } 3720 3721 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 3722 { 3723 u64 offloads = vi->guest_offloads; 3724 3725 if (!vi->guest_offloads) 3726 return 0; 3727 3728 return virtnet_set_guest_offloads(vi, offloads); 3729 } 3730 3731 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 3732 struct netlink_ext_ack *extack) 3733 { 3734 unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 3735 sizeof(struct skb_shared_info)); 3736 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 3737 struct virtnet_info *vi = netdev_priv(dev); 3738 struct bpf_prog *old_prog; 3739 u16 xdp_qp = 0, curr_qp; 3740 int i, err; 3741 3742 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 3743 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3744 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3745 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3746 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3747 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 3748 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 3749 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 3750 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 3751 return -EOPNOTSUPP; 3752 } 3753 3754 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 3755 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 3756 return -EINVAL; 3757 } 3758 3759 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 3760 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 3761 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 3762 return -EINVAL; 3763 } 3764 3765 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 3766 if (prog) 3767 xdp_qp = nr_cpu_ids; 3768 3769 /* XDP requires extra queues for XDP_TX */ 3770 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 3771 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 3772 curr_qp + xdp_qp, vi->max_queue_pairs); 3773 xdp_qp = 0; 3774 } 3775 3776 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 3777 if (!prog && !old_prog) 3778 return 0; 3779 3780 if (prog) 3781 bpf_prog_add(prog, vi->max_queue_pairs - 1); 3782 3783 /* Make sure NAPI is not using any XDP TX queues for RX. */ 3784 if (netif_running(dev)) { 3785 for (i = 0; i < vi->max_queue_pairs; i++) { 3786 napi_disable(&vi->rq[i].napi); 3787 virtnet_napi_tx_disable(&vi->sq[i].napi); 3788 } 3789 } 3790 3791 if (!prog) { 3792 for (i = 0; i < vi->max_queue_pairs; i++) { 3793 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3794 if (i == 0) 3795 virtnet_restore_guest_offloads(vi); 3796 } 3797 synchronize_net(); 3798 } 3799 3800 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 3801 if (err) 3802 goto err; 3803 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 3804 vi->xdp_queue_pairs = xdp_qp; 3805 3806 if (prog) { 3807 vi->xdp_enabled = true; 3808 for (i = 0; i < vi->max_queue_pairs; i++) { 3809 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3810 if (i == 0 && !old_prog) 3811 virtnet_clear_guest_offloads(vi); 3812 } 3813 if (!old_prog) 3814 xdp_features_set_redirect_target(dev, true); 3815 } else { 3816 xdp_features_clear_redirect_target(dev); 3817 vi->xdp_enabled = false; 3818 } 3819 3820 for (i = 0; i < vi->max_queue_pairs; i++) { 3821 if (old_prog) 3822 bpf_prog_put(old_prog); 3823 if (netif_running(dev)) { 3824 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3825 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3826 &vi->sq[i].napi); 3827 } 3828 } 3829 3830 return 0; 3831 3832 err: 3833 if (!prog) { 3834 virtnet_clear_guest_offloads(vi); 3835 for (i = 0; i < vi->max_queue_pairs; i++) 3836 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 3837 } 3838 3839 if (netif_running(dev)) { 3840 for (i = 0; i < vi->max_queue_pairs; i++) { 3841 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3842 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3843 &vi->sq[i].napi); 3844 } 3845 } 3846 if (prog) 3847 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 3848 return err; 3849 } 3850 3851 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 3852 { 3853 switch (xdp->command) { 3854 case XDP_SETUP_PROG: 3855 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 3856 default: 3857 return -EINVAL; 3858 } 3859 } 3860 3861 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 3862 size_t len) 3863 { 3864 struct virtnet_info *vi = netdev_priv(dev); 3865 int ret; 3866 3867 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3868 return -EOPNOTSUPP; 3869 3870 ret = snprintf(buf, len, "sby"); 3871 if (ret >= len) 3872 return -EOPNOTSUPP; 3873 3874 return 0; 3875 } 3876 3877 static int virtnet_set_features(struct net_device *dev, 3878 netdev_features_t features) 3879 { 3880 struct virtnet_info *vi = netdev_priv(dev); 3881 u64 offloads; 3882 int err; 3883 3884 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 3885 if (vi->xdp_enabled) 3886 return -EBUSY; 3887 3888 if (features & NETIF_F_GRO_HW) 3889 offloads = vi->guest_offloads_capable; 3890 else 3891 offloads = vi->guest_offloads_capable & 3892 ~GUEST_OFFLOAD_GRO_HW_MASK; 3893 3894 err = virtnet_set_guest_offloads(vi, offloads); 3895 if (err) 3896 return err; 3897 vi->guest_offloads = offloads; 3898 } 3899 3900 if ((dev->features ^ features) & NETIF_F_RXHASH) { 3901 if (features & NETIF_F_RXHASH) 3902 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3903 else 3904 vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 3905 3906 if (!virtnet_commit_rss_command(vi)) 3907 return -EINVAL; 3908 } 3909 3910 return 0; 3911 } 3912 3913 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 3914 { 3915 struct virtnet_info *priv = netdev_priv(dev); 3916 struct send_queue *sq = &priv->sq[txqueue]; 3917 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 3918 3919 u64_stats_update_begin(&sq->stats.syncp); 3920 u64_stats_inc(&sq->stats.tx_timeouts); 3921 u64_stats_update_end(&sq->stats.syncp); 3922 3923 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 3924 txqueue, sq->name, sq->vq->index, sq->vq->name, 3925 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 3926 } 3927 3928 static const struct net_device_ops virtnet_netdev = { 3929 .ndo_open = virtnet_open, 3930 .ndo_stop = virtnet_close, 3931 .ndo_start_xmit = start_xmit, 3932 .ndo_validate_addr = eth_validate_addr, 3933 .ndo_set_mac_address = virtnet_set_mac_address, 3934 .ndo_set_rx_mode = virtnet_set_rx_mode, 3935 .ndo_get_stats64 = virtnet_stats, 3936 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 3937 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 3938 .ndo_bpf = virtnet_xdp, 3939 .ndo_xdp_xmit = virtnet_xdp_xmit, 3940 .ndo_features_check = passthru_features_check, 3941 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 3942 .ndo_set_features = virtnet_set_features, 3943 .ndo_tx_timeout = virtnet_tx_timeout, 3944 }; 3945 3946 static void virtnet_config_changed_work(struct work_struct *work) 3947 { 3948 struct virtnet_info *vi = 3949 container_of(work, struct virtnet_info, config_work); 3950 u16 v; 3951 3952 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 3953 struct virtio_net_config, status, &v) < 0) 3954 return; 3955 3956 if (v & VIRTIO_NET_S_ANNOUNCE) { 3957 netdev_notify_peers(vi->dev); 3958 virtnet_ack_link_announce(vi); 3959 } 3960 3961 /* Ignore unknown (future) status bits */ 3962 v &= VIRTIO_NET_S_LINK_UP; 3963 3964 if (vi->status == v) 3965 return; 3966 3967 vi->status = v; 3968 3969 if (vi->status & VIRTIO_NET_S_LINK_UP) { 3970 virtnet_update_settings(vi); 3971 netif_carrier_on(vi->dev); 3972 netif_tx_wake_all_queues(vi->dev); 3973 } else { 3974 netif_carrier_off(vi->dev); 3975 netif_tx_stop_all_queues(vi->dev); 3976 } 3977 } 3978 3979 static void virtnet_config_changed(struct virtio_device *vdev) 3980 { 3981 struct virtnet_info *vi = vdev->priv; 3982 3983 schedule_work(&vi->config_work); 3984 } 3985 3986 static void virtnet_free_queues(struct virtnet_info *vi) 3987 { 3988 int i; 3989 3990 for (i = 0; i < vi->max_queue_pairs; i++) { 3991 __netif_napi_del(&vi->rq[i].napi); 3992 __netif_napi_del(&vi->sq[i].napi); 3993 } 3994 3995 /* We called __netif_napi_del(), 3996 * we need to respect an RCU grace period before freeing vi->rq 3997 */ 3998 synchronize_net(); 3999 4000 kfree(vi->rq); 4001 kfree(vi->sq); 4002 kfree(vi->ctrl); 4003 } 4004 4005 static void _free_receive_bufs(struct virtnet_info *vi) 4006 { 4007 struct bpf_prog *old_prog; 4008 int i; 4009 4010 for (i = 0; i < vi->max_queue_pairs; i++) { 4011 while (vi->rq[i].pages) 4012 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 4013 4014 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 4015 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 4016 if (old_prog) 4017 bpf_prog_put(old_prog); 4018 } 4019 } 4020 4021 static void free_receive_bufs(struct virtnet_info *vi) 4022 { 4023 rtnl_lock(); 4024 _free_receive_bufs(vi); 4025 rtnl_unlock(); 4026 } 4027 4028 static void free_receive_page_frags(struct virtnet_info *vi) 4029 { 4030 int i; 4031 for (i = 0; i < vi->max_queue_pairs; i++) 4032 if (vi->rq[i].alloc_frag.page) { 4033 if (vi->rq[i].do_dma && vi->rq[i].last_dma) 4034 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 4035 put_page(vi->rq[i].alloc_frag.page); 4036 } 4037 } 4038 4039 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 4040 { 4041 if (!is_xdp_frame(buf)) 4042 dev_kfree_skb(buf); 4043 else 4044 xdp_return_frame(ptr_to_xdp(buf)); 4045 } 4046 4047 static void free_unused_bufs(struct virtnet_info *vi) 4048 { 4049 void *buf; 4050 int i; 4051 4052 for (i = 0; i < vi->max_queue_pairs; i++) { 4053 struct virtqueue *vq = vi->sq[i].vq; 4054 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 4055 virtnet_sq_free_unused_buf(vq, buf); 4056 cond_resched(); 4057 } 4058 4059 for (i = 0; i < vi->max_queue_pairs; i++) { 4060 struct virtqueue *vq = vi->rq[i].vq; 4061 4062 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 4063 virtnet_rq_unmap_free_buf(vq, buf); 4064 cond_resched(); 4065 } 4066 } 4067 4068 static void virtnet_del_vqs(struct virtnet_info *vi) 4069 { 4070 struct virtio_device *vdev = vi->vdev; 4071 4072 virtnet_clean_affinity(vi); 4073 4074 vdev->config->del_vqs(vdev); 4075 4076 virtnet_free_queues(vi); 4077 } 4078 4079 /* How large should a single buffer be so a queue full of these can fit at 4080 * least one full packet? 4081 * Logic below assumes the mergeable buffer header is used. 4082 */ 4083 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 4084 { 4085 const unsigned int hdr_len = vi->hdr_len; 4086 unsigned int rq_size = virtqueue_get_vring_size(vq); 4087 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 4088 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 4089 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 4090 4091 return max(max(min_buf_len, hdr_len) - hdr_len, 4092 (unsigned int)GOOD_PACKET_LEN); 4093 } 4094 4095 static int virtnet_find_vqs(struct virtnet_info *vi) 4096 { 4097 vq_callback_t **callbacks; 4098 struct virtqueue **vqs; 4099 const char **names; 4100 int ret = -ENOMEM; 4101 int total_vqs; 4102 bool *ctx; 4103 u16 i; 4104 4105 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 4106 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 4107 * possible control vq. 4108 */ 4109 total_vqs = vi->max_queue_pairs * 2 + 4110 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 4111 4112 /* Allocate space for find_vqs parameters */ 4113 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 4114 if (!vqs) 4115 goto err_vq; 4116 callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL); 4117 if (!callbacks) 4118 goto err_callback; 4119 names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL); 4120 if (!names) 4121 goto err_names; 4122 if (!vi->big_packets || vi->mergeable_rx_bufs) { 4123 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 4124 if (!ctx) 4125 goto err_ctx; 4126 } else { 4127 ctx = NULL; 4128 } 4129 4130 /* Parameters for control virtqueue, if any */ 4131 if (vi->has_cvq) { 4132 callbacks[total_vqs - 1] = NULL; 4133 names[total_vqs - 1] = "control"; 4134 } 4135 4136 /* Allocate/initialize parameters for send/receive virtqueues */ 4137 for (i = 0; i < vi->max_queue_pairs; i++) { 4138 callbacks[rxq2vq(i)] = skb_recv_done; 4139 callbacks[txq2vq(i)] = skb_xmit_done; 4140 sprintf(vi->rq[i].name, "input.%u", i); 4141 sprintf(vi->sq[i].name, "output.%u", i); 4142 names[rxq2vq(i)] = vi->rq[i].name; 4143 names[txq2vq(i)] = vi->sq[i].name; 4144 if (ctx) 4145 ctx[rxq2vq(i)] = true; 4146 } 4147 4148 ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, 4149 names, ctx, NULL); 4150 if (ret) 4151 goto err_find; 4152 4153 if (vi->has_cvq) { 4154 vi->cvq = vqs[total_vqs - 1]; 4155 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 4156 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 4157 } 4158 4159 for (i = 0; i < vi->max_queue_pairs; i++) { 4160 vi->rq[i].vq = vqs[rxq2vq(i)]; 4161 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 4162 vi->sq[i].vq = vqs[txq2vq(i)]; 4163 } 4164 4165 /* run here: ret == 0. */ 4166 4167 4168 err_find: 4169 kfree(ctx); 4170 err_ctx: 4171 kfree(names); 4172 err_names: 4173 kfree(callbacks); 4174 err_callback: 4175 kfree(vqs); 4176 err_vq: 4177 return ret; 4178 } 4179 4180 static int virtnet_alloc_queues(struct virtnet_info *vi) 4181 { 4182 int i; 4183 4184 if (vi->has_cvq) { 4185 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 4186 if (!vi->ctrl) 4187 goto err_ctrl; 4188 } else { 4189 vi->ctrl = NULL; 4190 } 4191 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 4192 if (!vi->sq) 4193 goto err_sq; 4194 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 4195 if (!vi->rq) 4196 goto err_rq; 4197 4198 INIT_DELAYED_WORK(&vi->refill, refill_work); 4199 for (i = 0; i < vi->max_queue_pairs; i++) { 4200 vi->rq[i].pages = NULL; 4201 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 4202 napi_weight); 4203 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 4204 virtnet_poll_tx, 4205 napi_tx ? napi_weight : 0); 4206 4207 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 4208 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 4209 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 4210 4211 u64_stats_init(&vi->rq[i].stats.syncp); 4212 u64_stats_init(&vi->sq[i].stats.syncp); 4213 } 4214 4215 return 0; 4216 4217 err_rq: 4218 kfree(vi->sq); 4219 err_sq: 4220 kfree(vi->ctrl); 4221 err_ctrl: 4222 return -ENOMEM; 4223 } 4224 4225 static int init_vqs(struct virtnet_info *vi) 4226 { 4227 int ret; 4228 4229 /* Allocate send & receive queues */ 4230 ret = virtnet_alloc_queues(vi); 4231 if (ret) 4232 goto err; 4233 4234 ret = virtnet_find_vqs(vi); 4235 if (ret) 4236 goto err_free; 4237 4238 virtnet_rq_set_premapped(vi); 4239 4240 cpus_read_lock(); 4241 virtnet_set_affinity(vi); 4242 cpus_read_unlock(); 4243 4244 return 0; 4245 4246 err_free: 4247 virtnet_free_queues(vi); 4248 err: 4249 return ret; 4250 } 4251 4252 #ifdef CONFIG_SYSFS 4253 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 4254 char *buf) 4255 { 4256 struct virtnet_info *vi = netdev_priv(queue->dev); 4257 unsigned int queue_index = get_netdev_rx_queue_index(queue); 4258 unsigned int headroom = virtnet_get_headroom(vi); 4259 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 4260 struct ewma_pkt_len *avg; 4261 4262 BUG_ON(queue_index >= vi->max_queue_pairs); 4263 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 4264 return sprintf(buf, "%u\n", 4265 get_mergeable_buf_len(&vi->rq[queue_index], avg, 4266 SKB_DATA_ALIGN(headroom + tailroom))); 4267 } 4268 4269 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 4270 __ATTR_RO(mergeable_rx_buffer_size); 4271 4272 static struct attribute *virtio_net_mrg_rx_attrs[] = { 4273 &mergeable_rx_buffer_size_attribute.attr, 4274 NULL 4275 }; 4276 4277 static const struct attribute_group virtio_net_mrg_rx_group = { 4278 .name = "virtio_net", 4279 .attrs = virtio_net_mrg_rx_attrs 4280 }; 4281 #endif 4282 4283 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 4284 unsigned int fbit, 4285 const char *fname, const char *dname) 4286 { 4287 if (!virtio_has_feature(vdev, fbit)) 4288 return false; 4289 4290 dev_err(&vdev->dev, "device advertises feature %s but not %s", 4291 fname, dname); 4292 4293 return true; 4294 } 4295 4296 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 4297 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 4298 4299 static bool virtnet_validate_features(struct virtio_device *vdev) 4300 { 4301 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 4302 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 4303 "VIRTIO_NET_F_CTRL_VQ") || 4304 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 4305 "VIRTIO_NET_F_CTRL_VQ") || 4306 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 4307 "VIRTIO_NET_F_CTRL_VQ") || 4308 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 4309 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 4310 "VIRTIO_NET_F_CTRL_VQ") || 4311 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 4312 "VIRTIO_NET_F_CTRL_VQ") || 4313 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 4314 "VIRTIO_NET_F_CTRL_VQ") || 4315 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 4316 "VIRTIO_NET_F_CTRL_VQ") || 4317 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 4318 "VIRTIO_NET_F_CTRL_VQ"))) { 4319 return false; 4320 } 4321 4322 return true; 4323 } 4324 4325 #define MIN_MTU ETH_MIN_MTU 4326 #define MAX_MTU ETH_MAX_MTU 4327 4328 static int virtnet_validate(struct virtio_device *vdev) 4329 { 4330 if (!vdev->config->get) { 4331 dev_err(&vdev->dev, "%s failure: config access disabled\n", 4332 __func__); 4333 return -EINVAL; 4334 } 4335 4336 if (!virtnet_validate_features(vdev)) 4337 return -EINVAL; 4338 4339 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 4340 int mtu = virtio_cread16(vdev, 4341 offsetof(struct virtio_net_config, 4342 mtu)); 4343 if (mtu < MIN_MTU) 4344 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 4345 } 4346 4347 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 4348 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 4349 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 4350 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 4351 } 4352 4353 return 0; 4354 } 4355 4356 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 4357 { 4358 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 4359 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 4360 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 4361 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 4362 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 4363 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 4364 } 4365 4366 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 4367 { 4368 bool guest_gso = virtnet_check_guest_gso(vi); 4369 4370 /* If device can receive ANY guest GSO packets, regardless of mtu, 4371 * allocate packets of maximum size, otherwise limit it to only 4372 * mtu size worth only. 4373 */ 4374 if (mtu > ETH_DATA_LEN || guest_gso) { 4375 vi->big_packets = true; 4376 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 4377 } 4378 } 4379 4380 static int virtnet_probe(struct virtio_device *vdev) 4381 { 4382 int i, err = -ENOMEM; 4383 struct net_device *dev; 4384 struct virtnet_info *vi; 4385 u16 max_queue_pairs; 4386 int mtu = 0; 4387 4388 /* Find if host supports multiqueue/rss virtio_net device */ 4389 max_queue_pairs = 1; 4390 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 4391 max_queue_pairs = 4392 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 4393 4394 /* We need at least 2 queue's */ 4395 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 4396 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 4397 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 4398 max_queue_pairs = 1; 4399 4400 /* Allocate ourselves a network device with room for our info */ 4401 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 4402 if (!dev) 4403 return -ENOMEM; 4404 4405 /* Set up network device as normal. */ 4406 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 4407 IFF_TX_SKB_NO_LINEAR; 4408 dev->netdev_ops = &virtnet_netdev; 4409 dev->features = NETIF_F_HIGHDMA; 4410 4411 dev->ethtool_ops = &virtnet_ethtool_ops; 4412 SET_NETDEV_DEV(dev, &vdev->dev); 4413 4414 /* Do we support "hardware" checksums? */ 4415 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 4416 /* This opens up the world of extra features. */ 4417 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 4418 if (csum) 4419 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 4420 4421 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 4422 dev->hw_features |= NETIF_F_TSO 4423 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 4424 } 4425 /* Individual feature bits: what can host handle? */ 4426 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 4427 dev->hw_features |= NETIF_F_TSO; 4428 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 4429 dev->hw_features |= NETIF_F_TSO6; 4430 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 4431 dev->hw_features |= NETIF_F_TSO_ECN; 4432 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 4433 dev->hw_features |= NETIF_F_GSO_UDP_L4; 4434 4435 dev->features |= NETIF_F_GSO_ROBUST; 4436 4437 if (gso) 4438 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 4439 /* (!csum && gso) case will be fixed by register_netdev() */ 4440 } 4441 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) 4442 dev->features |= NETIF_F_RXCSUM; 4443 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 4444 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 4445 dev->features |= NETIF_F_GRO_HW; 4446 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 4447 dev->hw_features |= NETIF_F_GRO_HW; 4448 4449 dev->vlan_features = dev->features; 4450 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 4451 4452 /* MTU range: 68 - 65535 */ 4453 dev->min_mtu = MIN_MTU; 4454 dev->max_mtu = MAX_MTU; 4455 4456 /* Configuration may specify what MAC to use. Otherwise random. */ 4457 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 4458 u8 addr[ETH_ALEN]; 4459 4460 virtio_cread_bytes(vdev, 4461 offsetof(struct virtio_net_config, mac), 4462 addr, ETH_ALEN); 4463 eth_hw_addr_set(dev, addr); 4464 } else { 4465 eth_hw_addr_random(dev); 4466 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 4467 dev->dev_addr); 4468 } 4469 4470 /* Set up our device-specific information */ 4471 vi = netdev_priv(dev); 4472 vi->dev = dev; 4473 vi->vdev = vdev; 4474 vdev->priv = vi; 4475 4476 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 4477 spin_lock_init(&vi->refill_lock); 4478 4479 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 4480 vi->mergeable_rx_bufs = true; 4481 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 4482 } 4483 4484 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 4485 vi->intr_coal_rx.max_usecs = 0; 4486 vi->intr_coal_tx.max_usecs = 0; 4487 vi->intr_coal_tx.max_packets = 0; 4488 vi->intr_coal_rx.max_packets = 0; 4489 } 4490 4491 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 4492 vi->has_rss_hash_report = true; 4493 4494 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 4495 vi->has_rss = true; 4496 4497 if (vi->has_rss || vi->has_rss_hash_report) { 4498 vi->rss_indir_table_size = 4499 virtio_cread16(vdev, offsetof(struct virtio_net_config, 4500 rss_max_indirection_table_length)); 4501 vi->rss_key_size = 4502 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 4503 4504 vi->rss_hash_types_supported = 4505 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 4506 vi->rss_hash_types_supported &= 4507 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 4508 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 4509 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 4510 4511 dev->hw_features |= NETIF_F_RXHASH; 4512 } 4513 4514 if (vi->has_rss_hash_report) 4515 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 4516 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 4517 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 4518 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 4519 else 4520 vi->hdr_len = sizeof(struct virtio_net_hdr); 4521 4522 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 4523 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 4524 vi->any_header_sg = true; 4525 4526 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 4527 vi->has_cvq = true; 4528 4529 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 4530 mtu = virtio_cread16(vdev, 4531 offsetof(struct virtio_net_config, 4532 mtu)); 4533 if (mtu < dev->min_mtu) { 4534 /* Should never trigger: MTU was previously validated 4535 * in virtnet_validate. 4536 */ 4537 dev_err(&vdev->dev, 4538 "device MTU appears to have changed it is now %d < %d", 4539 mtu, dev->min_mtu); 4540 err = -EINVAL; 4541 goto free; 4542 } 4543 4544 dev->mtu = mtu; 4545 dev->max_mtu = mtu; 4546 } 4547 4548 virtnet_set_big_packets(vi, mtu); 4549 4550 if (vi->any_header_sg) 4551 dev->needed_headroom = vi->hdr_len; 4552 4553 /* Enable multiqueue by default */ 4554 if (num_online_cpus() >= max_queue_pairs) 4555 vi->curr_queue_pairs = max_queue_pairs; 4556 else 4557 vi->curr_queue_pairs = num_online_cpus(); 4558 vi->max_queue_pairs = max_queue_pairs; 4559 4560 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 4561 err = init_vqs(vi); 4562 if (err) 4563 goto free; 4564 4565 #ifdef CONFIG_SYSFS 4566 if (vi->mergeable_rx_bufs) 4567 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 4568 #endif 4569 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 4570 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 4571 4572 virtnet_init_settings(dev); 4573 4574 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 4575 vi->failover = net_failover_create(vi->dev); 4576 if (IS_ERR(vi->failover)) { 4577 err = PTR_ERR(vi->failover); 4578 goto free_vqs; 4579 } 4580 } 4581 4582 if (vi->has_rss || vi->has_rss_hash_report) 4583 virtnet_init_default_rss(vi); 4584 4585 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 4586 rtnl_lock(); 4587 4588 err = register_netdevice(dev); 4589 if (err) { 4590 pr_debug("virtio_net: registering device failed\n"); 4591 rtnl_unlock(); 4592 goto free_failover; 4593 } 4594 4595 virtio_device_ready(vdev); 4596 4597 _virtnet_set_queues(vi, vi->curr_queue_pairs); 4598 4599 /* a random MAC address has been assigned, notify the device. 4600 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 4601 * because many devices work fine without getting MAC explicitly 4602 */ 4603 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 4604 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 4605 struct scatterlist sg; 4606 4607 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 4608 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 4609 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 4610 pr_debug("virtio_net: setting MAC address failed\n"); 4611 rtnl_unlock(); 4612 err = -EINVAL; 4613 goto free_unregister_netdev; 4614 } 4615 } 4616 4617 rtnl_unlock(); 4618 4619 err = virtnet_cpu_notif_add(vi); 4620 if (err) { 4621 pr_debug("virtio_net: registering cpu notifier failed\n"); 4622 goto free_unregister_netdev; 4623 } 4624 4625 /* Assume link up if device can't report link status, 4626 otherwise get link status from config. */ 4627 netif_carrier_off(dev); 4628 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 4629 schedule_work(&vi->config_work); 4630 } else { 4631 vi->status = VIRTIO_NET_S_LINK_UP; 4632 virtnet_update_settings(vi); 4633 netif_carrier_on(dev); 4634 } 4635 4636 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 4637 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 4638 set_bit(guest_offloads[i], &vi->guest_offloads); 4639 vi->guest_offloads_capable = vi->guest_offloads; 4640 4641 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 4642 dev->name, max_queue_pairs); 4643 4644 return 0; 4645 4646 free_unregister_netdev: 4647 unregister_netdev(dev); 4648 free_failover: 4649 net_failover_destroy(vi->failover); 4650 free_vqs: 4651 virtio_reset_device(vdev); 4652 cancel_delayed_work_sync(&vi->refill); 4653 free_receive_page_frags(vi); 4654 virtnet_del_vqs(vi); 4655 free: 4656 free_netdev(dev); 4657 return err; 4658 } 4659 4660 static void remove_vq_common(struct virtnet_info *vi) 4661 { 4662 virtio_reset_device(vi->vdev); 4663 4664 /* Free unused buffers in both send and recv, if any. */ 4665 free_unused_bufs(vi); 4666 4667 free_receive_bufs(vi); 4668 4669 free_receive_page_frags(vi); 4670 4671 virtnet_del_vqs(vi); 4672 } 4673 4674 static void virtnet_remove(struct virtio_device *vdev) 4675 { 4676 struct virtnet_info *vi = vdev->priv; 4677 4678 virtnet_cpu_notif_remove(vi); 4679 4680 /* Make sure no work handler is accessing the device. */ 4681 flush_work(&vi->config_work); 4682 4683 unregister_netdev(vi->dev); 4684 4685 net_failover_destroy(vi->failover); 4686 4687 remove_vq_common(vi); 4688 4689 free_netdev(vi->dev); 4690 } 4691 4692 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 4693 { 4694 struct virtnet_info *vi = vdev->priv; 4695 4696 virtnet_cpu_notif_remove(vi); 4697 virtnet_freeze_down(vdev); 4698 remove_vq_common(vi); 4699 4700 return 0; 4701 } 4702 4703 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 4704 { 4705 struct virtnet_info *vi = vdev->priv; 4706 int err; 4707 4708 err = virtnet_restore_up(vdev); 4709 if (err) 4710 return err; 4711 virtnet_set_queues(vi, vi->curr_queue_pairs); 4712 4713 err = virtnet_cpu_notif_add(vi); 4714 if (err) { 4715 virtnet_freeze_down(vdev); 4716 remove_vq_common(vi); 4717 return err; 4718 } 4719 4720 return 0; 4721 } 4722 4723 static struct virtio_device_id id_table[] = { 4724 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 4725 { 0 }, 4726 }; 4727 4728 #define VIRTNET_FEATURES \ 4729 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 4730 VIRTIO_NET_F_MAC, \ 4731 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 4732 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 4733 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 4734 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 4735 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 4736 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 4737 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 4738 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 4739 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 4740 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 4741 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 4742 VIRTIO_NET_F_VQ_NOTF_COAL, \ 4743 VIRTIO_NET_F_GUEST_HDRLEN 4744 4745 static unsigned int features[] = { 4746 VIRTNET_FEATURES, 4747 }; 4748 4749 static unsigned int features_legacy[] = { 4750 VIRTNET_FEATURES, 4751 VIRTIO_NET_F_GSO, 4752 VIRTIO_F_ANY_LAYOUT, 4753 }; 4754 4755 static struct virtio_driver virtio_net_driver = { 4756 .feature_table = features, 4757 .feature_table_size = ARRAY_SIZE(features), 4758 .feature_table_legacy = features_legacy, 4759 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 4760 .driver.name = KBUILD_MODNAME, 4761 .driver.owner = THIS_MODULE, 4762 .id_table = id_table, 4763 .validate = virtnet_validate, 4764 .probe = virtnet_probe, 4765 .remove = virtnet_remove, 4766 .config_changed = virtnet_config_changed, 4767 #ifdef CONFIG_PM_SLEEP 4768 .freeze = virtnet_freeze, 4769 .restore = virtnet_restore, 4770 #endif 4771 }; 4772 4773 static __init int virtio_net_driver_init(void) 4774 { 4775 int ret; 4776 4777 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 4778 virtnet_cpu_online, 4779 virtnet_cpu_down_prep); 4780 if (ret < 0) 4781 goto out; 4782 virtionet_online = ret; 4783 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 4784 NULL, virtnet_cpu_dead); 4785 if (ret) 4786 goto err_dead; 4787 ret = register_virtio_driver(&virtio_net_driver); 4788 if (ret) 4789 goto err_virtio; 4790 return 0; 4791 err_virtio: 4792 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4793 err_dead: 4794 cpuhp_remove_multi_state(virtionet_online); 4795 out: 4796 return ret; 4797 } 4798 module_init(virtio_net_driver_init); 4799 4800 static __exit void virtio_net_driver_exit(void) 4801 { 4802 unregister_virtio_driver(&virtio_net_driver); 4803 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4804 cpuhp_remove_multi_state(virtionet_online); 4805 } 4806 module_exit(virtio_net_driver_exit); 4807 4808 MODULE_DEVICE_TABLE(virtio, id_table); 4809 MODULE_DESCRIPTION("Virtio network driver"); 4810 MODULE_LICENSE("GPL"); 4811