1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <net/route.h> 23 #include <net/xdp.h> 24 #include <net/net_failover.h> 25 #include <net/netdev_rx_queue.h> 26 27 static int napi_weight = NAPI_POLL_WEIGHT; 28 module_param(napi_weight, int, 0444); 29 30 static bool csum = true, gso = true, napi_tx = true; 31 module_param(csum, bool, 0444); 32 module_param(gso, bool, 0444); 33 module_param(napi_tx, bool, 0644); 34 35 /* FIXME: MTU in config. */ 36 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 37 #define GOOD_COPY_LEN 128 38 39 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 40 41 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 42 #define VIRTIO_XDP_HEADROOM 256 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 50 /* RX packet size EWMA. The average packet size is used to determine the packet 51 * buffer size when refilling RX rings. As the entire RX ring may be refilled 52 * at once, the weight is chosen so that the EWMA will be insensitive to short- 53 * term, transient changes in packet size. 54 */ 55 DECLARE_EWMA(pkt_len, 0, 64) 56 57 #define VIRTNET_DRIVER_VERSION "1.0.0" 58 59 static const unsigned long guest_offloads[] = { 60 VIRTIO_NET_F_GUEST_TSO4, 61 VIRTIO_NET_F_GUEST_TSO6, 62 VIRTIO_NET_F_GUEST_ECN, 63 VIRTIO_NET_F_GUEST_UFO, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GUEST_USO4, 66 VIRTIO_NET_F_GUEST_USO6, 67 VIRTIO_NET_F_GUEST_HDRLEN 68 }; 69 70 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 76 77 struct virtnet_stat_desc { 78 char desc[ETH_GSTRING_LEN]; 79 size_t offset; 80 }; 81 82 struct virtnet_sq_stats { 83 struct u64_stats_sync syncp; 84 u64_stats_t packets; 85 u64_stats_t bytes; 86 u64_stats_t xdp_tx; 87 u64_stats_t xdp_tx_drops; 88 u64_stats_t kicks; 89 u64_stats_t tx_timeouts; 90 }; 91 92 struct virtnet_rq_stats { 93 struct u64_stats_sync syncp; 94 u64_stats_t packets; 95 u64_stats_t bytes; 96 u64_stats_t drops; 97 u64_stats_t xdp_packets; 98 u64_stats_t xdp_tx; 99 u64_stats_t xdp_redirects; 100 u64_stats_t xdp_drops; 101 u64_stats_t kicks; 102 }; 103 104 #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) 105 #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) 106 107 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 108 { "packets", VIRTNET_SQ_STAT(packets) }, 109 { "bytes", VIRTNET_SQ_STAT(bytes) }, 110 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, 111 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, 112 { "kicks", VIRTNET_SQ_STAT(kicks) }, 113 { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, 114 }; 115 116 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 117 { "packets", VIRTNET_RQ_STAT(packets) }, 118 { "bytes", VIRTNET_RQ_STAT(bytes) }, 119 { "drops", VIRTNET_RQ_STAT(drops) }, 120 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, 121 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, 122 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, 123 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, 124 { "kicks", VIRTNET_RQ_STAT(kicks) }, 125 }; 126 127 #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) 128 #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) 129 130 struct virtnet_interrupt_coalesce { 131 u32 max_packets; 132 u32 max_usecs; 133 }; 134 135 /* The dma information of pages allocated at a time. */ 136 struct virtnet_rq_dma { 137 dma_addr_t addr; 138 u32 ref; 139 u16 len; 140 u16 need_sync; 141 }; 142 143 /* Internal representation of a send virtqueue */ 144 struct send_queue { 145 /* Virtqueue associated with this send _queue */ 146 struct virtqueue *vq; 147 148 /* TX: fragments + linear part + virtio header */ 149 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 150 151 /* Name of the send queue: output.$index */ 152 char name[16]; 153 154 struct virtnet_sq_stats stats; 155 156 struct virtnet_interrupt_coalesce intr_coal; 157 158 struct napi_struct napi; 159 160 /* Record whether sq is in reset state. */ 161 bool reset; 162 }; 163 164 /* Internal representation of a receive virtqueue */ 165 struct receive_queue { 166 /* Virtqueue associated with this receive_queue */ 167 struct virtqueue *vq; 168 169 struct napi_struct napi; 170 171 struct bpf_prog __rcu *xdp_prog; 172 173 struct virtnet_rq_stats stats; 174 175 struct virtnet_interrupt_coalesce intr_coal; 176 177 /* Chain pages by the private ptr. */ 178 struct page *pages; 179 180 /* Average packet length for mergeable receive buffers. */ 181 struct ewma_pkt_len mrg_avg_pkt_len; 182 183 /* Page frag for packet buffer allocation. */ 184 struct page_frag alloc_frag; 185 186 /* RX: fragments + linear part + virtio header */ 187 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 188 189 /* Min single buffer size for mergeable buffers case. */ 190 unsigned int min_buf_len; 191 192 /* Name of this receive queue: input.$index */ 193 char name[16]; 194 195 struct xdp_rxq_info xdp_rxq; 196 197 /* Record the last dma info to free after new pages is allocated. */ 198 struct virtnet_rq_dma *last_dma; 199 200 /* Do dma by self */ 201 bool do_dma; 202 }; 203 204 /* This structure can contain rss message with maximum settings for indirection table and keysize 205 * Note, that default structure that describes RSS configuration virtio_net_rss_config 206 * contains same info but can't handle table values. 207 * In any case, structure would be passed to virtio hw through sg_buf split by parts 208 * because table sizes may be differ according to the device configuration. 209 */ 210 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 211 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 212 struct virtio_net_ctrl_rss { 213 u32 hash_types; 214 u16 indirection_table_mask; 215 u16 unclassified_queue; 216 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 217 u16 max_tx_vq; 218 u8 hash_key_length; 219 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 220 }; 221 222 /* Control VQ buffers: protected by the rtnl lock */ 223 struct control_buf { 224 struct virtio_net_ctrl_hdr hdr; 225 virtio_net_ctrl_ack status; 226 struct virtio_net_ctrl_mq mq; 227 u8 promisc; 228 u8 allmulti; 229 __virtio16 vid; 230 __virtio64 offloads; 231 struct virtio_net_ctrl_rss rss; 232 struct virtio_net_ctrl_coal_tx coal_tx; 233 struct virtio_net_ctrl_coal_rx coal_rx; 234 struct virtio_net_ctrl_coal_vq coal_vq; 235 }; 236 237 struct virtnet_info { 238 struct virtio_device *vdev; 239 struct virtqueue *cvq; 240 struct net_device *dev; 241 struct send_queue *sq; 242 struct receive_queue *rq; 243 unsigned int status; 244 245 /* Max # of queue pairs supported by the device */ 246 u16 max_queue_pairs; 247 248 /* # of queue pairs currently used by the driver */ 249 u16 curr_queue_pairs; 250 251 /* # of XDP queue pairs currently used by the driver */ 252 u16 xdp_queue_pairs; 253 254 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 255 bool xdp_enabled; 256 257 /* I like... big packets and I cannot lie! */ 258 bool big_packets; 259 260 /* number of sg entries allocated for big packets */ 261 unsigned int big_packets_num_skbfrags; 262 263 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 264 bool mergeable_rx_bufs; 265 266 /* Host supports rss and/or hash report */ 267 bool has_rss; 268 bool has_rss_hash_report; 269 u8 rss_key_size; 270 u16 rss_indir_table_size; 271 u32 rss_hash_types_supported; 272 u32 rss_hash_types_saved; 273 274 /* Has control virtqueue */ 275 bool has_cvq; 276 277 /* Host can handle any s/g split between our header and packet data */ 278 bool any_header_sg; 279 280 /* Packet virtio header size */ 281 u8 hdr_len; 282 283 /* Work struct for delayed refilling if we run low on memory. */ 284 struct delayed_work refill; 285 286 /* Is delayed refill enabled? */ 287 bool refill_enabled; 288 289 /* The lock to synchronize the access to refill_enabled */ 290 spinlock_t refill_lock; 291 292 /* Work struct for config space updates */ 293 struct work_struct config_work; 294 295 /* Does the affinity hint is set for virtqueues? */ 296 bool affinity_hint_set; 297 298 /* CPU hotplug instances for online & dead */ 299 struct hlist_node node; 300 struct hlist_node node_dead; 301 302 struct control_buf *ctrl; 303 304 /* Ethtool settings */ 305 u8 duplex; 306 u32 speed; 307 308 /* Interrupt coalescing settings */ 309 struct virtnet_interrupt_coalesce intr_coal_tx; 310 struct virtnet_interrupt_coalesce intr_coal_rx; 311 312 unsigned long guest_offloads; 313 unsigned long guest_offloads_capable; 314 315 /* failover when STANDBY feature enabled */ 316 struct failover *failover; 317 }; 318 319 struct padded_vnet_hdr { 320 struct virtio_net_hdr_v1_hash hdr; 321 /* 322 * hdr is in a separate sg buffer, and data sg buffer shares same page 323 * with this header sg. This padding makes next sg 16 byte aligned 324 * after the header. 325 */ 326 char padding[12]; 327 }; 328 329 struct virtio_net_common_hdr { 330 union { 331 struct virtio_net_hdr hdr; 332 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 333 struct virtio_net_hdr_v1_hash hash_v1_hdr; 334 }; 335 }; 336 337 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 338 339 static bool is_xdp_frame(void *ptr) 340 { 341 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 342 } 343 344 static void *xdp_to_ptr(struct xdp_frame *ptr) 345 { 346 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 347 } 348 349 static struct xdp_frame *ptr_to_xdp(void *ptr) 350 { 351 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 352 } 353 354 /* Converting between virtqueue no. and kernel tx/rx queue no. 355 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 356 */ 357 static int vq2txq(struct virtqueue *vq) 358 { 359 return (vq->index - 1) / 2; 360 } 361 362 static int txq2vq(int txq) 363 { 364 return txq * 2 + 1; 365 } 366 367 static int vq2rxq(struct virtqueue *vq) 368 { 369 return vq->index / 2; 370 } 371 372 static int rxq2vq(int rxq) 373 { 374 return rxq * 2; 375 } 376 377 static inline struct virtio_net_common_hdr * 378 skb_vnet_common_hdr(struct sk_buff *skb) 379 { 380 return (struct virtio_net_common_hdr *)skb->cb; 381 } 382 383 /* 384 * private is used to chain pages for big packets, put the whole 385 * most recent used list in the beginning for reuse 386 */ 387 static void give_pages(struct receive_queue *rq, struct page *page) 388 { 389 struct page *end; 390 391 /* Find end of list, sew whole thing into vi->rq.pages. */ 392 for (end = page; end->private; end = (struct page *)end->private); 393 end->private = (unsigned long)rq->pages; 394 rq->pages = page; 395 } 396 397 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 398 { 399 struct page *p = rq->pages; 400 401 if (p) { 402 rq->pages = (struct page *)p->private; 403 /* clear private here, it is used to chain pages */ 404 p->private = 0; 405 } else 406 p = alloc_page(gfp_mask); 407 return p; 408 } 409 410 static void virtnet_rq_free_buf(struct virtnet_info *vi, 411 struct receive_queue *rq, void *buf) 412 { 413 if (vi->mergeable_rx_bufs) 414 put_page(virt_to_head_page(buf)); 415 else if (vi->big_packets) 416 give_pages(rq, buf); 417 else 418 put_page(virt_to_head_page(buf)); 419 } 420 421 static void enable_delayed_refill(struct virtnet_info *vi) 422 { 423 spin_lock_bh(&vi->refill_lock); 424 vi->refill_enabled = true; 425 spin_unlock_bh(&vi->refill_lock); 426 } 427 428 static void disable_delayed_refill(struct virtnet_info *vi) 429 { 430 spin_lock_bh(&vi->refill_lock); 431 vi->refill_enabled = false; 432 spin_unlock_bh(&vi->refill_lock); 433 } 434 435 static void virtqueue_napi_schedule(struct napi_struct *napi, 436 struct virtqueue *vq) 437 { 438 if (napi_schedule_prep(napi)) { 439 virtqueue_disable_cb(vq); 440 __napi_schedule(napi); 441 } 442 } 443 444 static void virtqueue_napi_complete(struct napi_struct *napi, 445 struct virtqueue *vq, int processed) 446 { 447 int opaque; 448 449 opaque = virtqueue_enable_cb_prepare(vq); 450 if (napi_complete_done(napi, processed)) { 451 if (unlikely(virtqueue_poll(vq, opaque))) 452 virtqueue_napi_schedule(napi, vq); 453 } else { 454 virtqueue_disable_cb(vq); 455 } 456 } 457 458 static void skb_xmit_done(struct virtqueue *vq) 459 { 460 struct virtnet_info *vi = vq->vdev->priv; 461 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 462 463 /* Suppress further interrupts. */ 464 virtqueue_disable_cb(vq); 465 466 if (napi->weight) 467 virtqueue_napi_schedule(napi, vq); 468 else 469 /* We were probably waiting for more output buffers. */ 470 netif_wake_subqueue(vi->dev, vq2txq(vq)); 471 } 472 473 #define MRG_CTX_HEADER_SHIFT 22 474 static void *mergeable_len_to_ctx(unsigned int truesize, 475 unsigned int headroom) 476 { 477 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 478 } 479 480 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 481 { 482 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 483 } 484 485 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 486 { 487 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 488 } 489 490 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 491 unsigned int headroom, 492 unsigned int len) 493 { 494 struct sk_buff *skb; 495 496 skb = build_skb(buf, buflen); 497 if (unlikely(!skb)) 498 return NULL; 499 500 skb_reserve(skb, headroom); 501 skb_put(skb, len); 502 503 return skb; 504 } 505 506 /* Called from bottom half context */ 507 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 508 struct receive_queue *rq, 509 struct page *page, unsigned int offset, 510 unsigned int len, unsigned int truesize, 511 unsigned int headroom) 512 { 513 struct sk_buff *skb; 514 struct virtio_net_common_hdr *hdr; 515 unsigned int copy, hdr_len, hdr_padded_len; 516 struct page *page_to_free = NULL; 517 int tailroom, shinfo_size; 518 char *p, *hdr_p, *buf; 519 520 p = page_address(page) + offset; 521 hdr_p = p; 522 523 hdr_len = vi->hdr_len; 524 if (vi->mergeable_rx_bufs) 525 hdr_padded_len = hdr_len; 526 else 527 hdr_padded_len = sizeof(struct padded_vnet_hdr); 528 529 buf = p - headroom; 530 len -= hdr_len; 531 offset += hdr_padded_len; 532 p += hdr_padded_len; 533 tailroom = truesize - headroom - hdr_padded_len - len; 534 535 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 536 537 /* copy small packet so we can reuse these pages */ 538 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 539 skb = virtnet_build_skb(buf, truesize, p - buf, len); 540 if (unlikely(!skb)) 541 return NULL; 542 543 page = (struct page *)page->private; 544 if (page) 545 give_pages(rq, page); 546 goto ok; 547 } 548 549 /* copy small packet so we can reuse these pages for small data */ 550 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 551 if (unlikely(!skb)) 552 return NULL; 553 554 /* Copy all frame if it fits skb->head, otherwise 555 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 556 */ 557 if (len <= skb_tailroom(skb)) 558 copy = len; 559 else 560 copy = ETH_HLEN; 561 skb_put_data(skb, p, copy); 562 563 len -= copy; 564 offset += copy; 565 566 if (vi->mergeable_rx_bufs) { 567 if (len) 568 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 569 else 570 page_to_free = page; 571 goto ok; 572 } 573 574 /* 575 * Verify that we can indeed put this data into a skb. 576 * This is here to handle cases when the device erroneously 577 * tries to receive more than is possible. This is usually 578 * the case of a broken device. 579 */ 580 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 581 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 582 dev_kfree_skb(skb); 583 return NULL; 584 } 585 BUG_ON(offset >= PAGE_SIZE); 586 while (len) { 587 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 588 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 589 frag_size, truesize); 590 len -= frag_size; 591 page = (struct page *)page->private; 592 offset = 0; 593 } 594 595 if (page) 596 give_pages(rq, page); 597 598 ok: 599 hdr = skb_vnet_common_hdr(skb); 600 memcpy(hdr, hdr_p, hdr_len); 601 if (page_to_free) 602 put_page(page_to_free); 603 604 return skb; 605 } 606 607 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 608 { 609 struct page *page = virt_to_head_page(buf); 610 struct virtnet_rq_dma *dma; 611 void *head; 612 int offset; 613 614 head = page_address(page); 615 616 dma = head; 617 618 --dma->ref; 619 620 if (dma->need_sync && len) { 621 offset = buf - (head + sizeof(*dma)); 622 623 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 624 offset, len, 625 DMA_FROM_DEVICE); 626 } 627 628 if (dma->ref) 629 return; 630 631 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 632 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 633 put_page(page); 634 } 635 636 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 637 { 638 void *buf; 639 640 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 641 if (buf && rq->do_dma) 642 virtnet_rq_unmap(rq, buf, *len); 643 644 return buf; 645 } 646 647 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 648 { 649 struct virtnet_rq_dma *dma; 650 dma_addr_t addr; 651 u32 offset; 652 void *head; 653 654 if (!rq->do_dma) { 655 sg_init_one(rq->sg, buf, len); 656 return; 657 } 658 659 head = page_address(rq->alloc_frag.page); 660 661 offset = buf - head; 662 663 dma = head; 664 665 addr = dma->addr - sizeof(*dma) + offset; 666 667 sg_init_table(rq->sg, 1); 668 rq->sg[0].dma_address = addr; 669 rq->sg[0].length = len; 670 } 671 672 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 673 { 674 struct page_frag *alloc_frag = &rq->alloc_frag; 675 struct virtnet_rq_dma *dma; 676 void *buf, *head; 677 dma_addr_t addr; 678 679 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 680 return NULL; 681 682 head = page_address(alloc_frag->page); 683 684 if (rq->do_dma) { 685 dma = head; 686 687 /* new pages */ 688 if (!alloc_frag->offset) { 689 if (rq->last_dma) { 690 /* Now, the new page is allocated, the last dma 691 * will not be used. So the dma can be unmapped 692 * if the ref is 0. 693 */ 694 virtnet_rq_unmap(rq, rq->last_dma, 0); 695 rq->last_dma = NULL; 696 } 697 698 dma->len = alloc_frag->size - sizeof(*dma); 699 700 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 701 dma->len, DMA_FROM_DEVICE, 0); 702 if (virtqueue_dma_mapping_error(rq->vq, addr)) 703 return NULL; 704 705 dma->addr = addr; 706 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 707 708 /* Add a reference to dma to prevent the entire dma from 709 * being released during error handling. This reference 710 * will be freed after the pages are no longer used. 711 */ 712 get_page(alloc_frag->page); 713 dma->ref = 1; 714 alloc_frag->offset = sizeof(*dma); 715 716 rq->last_dma = dma; 717 } 718 719 ++dma->ref; 720 } 721 722 buf = head + alloc_frag->offset; 723 724 get_page(alloc_frag->page); 725 alloc_frag->offset += size; 726 727 return buf; 728 } 729 730 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 731 { 732 int i; 733 734 /* disable for big mode */ 735 if (!vi->mergeable_rx_bufs && vi->big_packets) 736 return; 737 738 for (i = 0; i < vi->max_queue_pairs; i++) { 739 if (virtqueue_set_dma_premapped(vi->rq[i].vq)) 740 continue; 741 742 vi->rq[i].do_dma = true; 743 } 744 } 745 746 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 747 { 748 struct virtnet_info *vi = vq->vdev->priv; 749 struct receive_queue *rq; 750 int i = vq2rxq(vq); 751 752 rq = &vi->rq[i]; 753 754 if (rq->do_dma) 755 virtnet_rq_unmap(rq, buf, 0); 756 757 virtnet_rq_free_buf(vi, rq, buf); 758 } 759 760 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) 761 { 762 unsigned int len; 763 unsigned int packets = 0; 764 unsigned int bytes = 0; 765 void *ptr; 766 767 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 768 if (likely(!is_xdp_frame(ptr))) { 769 struct sk_buff *skb = ptr; 770 771 pr_debug("Sent skb %p\n", skb); 772 773 bytes += skb->len; 774 napi_consume_skb(skb, in_napi); 775 } else { 776 struct xdp_frame *frame = ptr_to_xdp(ptr); 777 778 bytes += xdp_get_frame_len(frame); 779 xdp_return_frame(frame); 780 } 781 packets++; 782 } 783 784 /* Avoid overhead when no packets have been processed 785 * happens when called speculatively from start_xmit. 786 */ 787 if (!packets) 788 return; 789 790 u64_stats_update_begin(&sq->stats.syncp); 791 u64_stats_add(&sq->stats.bytes, bytes); 792 u64_stats_add(&sq->stats.packets, packets); 793 u64_stats_update_end(&sq->stats.syncp); 794 } 795 796 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 797 { 798 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 799 return false; 800 else if (q < vi->curr_queue_pairs) 801 return true; 802 else 803 return false; 804 } 805 806 static void check_sq_full_and_disable(struct virtnet_info *vi, 807 struct net_device *dev, 808 struct send_queue *sq) 809 { 810 bool use_napi = sq->napi.weight; 811 int qnum; 812 813 qnum = sq - vi->sq; 814 815 /* If running out of space, stop queue to avoid getting packets that we 816 * are then unable to transmit. 817 * An alternative would be to force queuing layer to requeue the skb by 818 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 819 * returned in a normal path of operation: it means that driver is not 820 * maintaining the TX queue stop/start state properly, and causes 821 * the stack to do a non-trivial amount of useless work. 822 * Since most packets only take 1 or 2 ring slots, stopping the queue 823 * early means 16 slots are typically wasted. 824 */ 825 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 826 netif_stop_subqueue(dev, qnum); 827 if (use_napi) { 828 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 829 virtqueue_napi_schedule(&sq->napi, sq->vq); 830 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 831 /* More just got used, free them then recheck. */ 832 free_old_xmit_skbs(sq, false); 833 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 834 netif_start_subqueue(dev, qnum); 835 virtqueue_disable_cb(sq->vq); 836 } 837 } 838 } 839 } 840 841 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 842 struct send_queue *sq, 843 struct xdp_frame *xdpf) 844 { 845 struct virtio_net_hdr_mrg_rxbuf *hdr; 846 struct skb_shared_info *shinfo; 847 u8 nr_frags = 0; 848 int err, i; 849 850 if (unlikely(xdpf->headroom < vi->hdr_len)) 851 return -EOVERFLOW; 852 853 if (unlikely(xdp_frame_has_frags(xdpf))) { 854 shinfo = xdp_get_shared_info_from_frame(xdpf); 855 nr_frags = shinfo->nr_frags; 856 } 857 858 /* In wrapping function virtnet_xdp_xmit(), we need to free 859 * up the pending old buffers, where we need to calculate the 860 * position of skb_shared_info in xdp_get_frame_len() and 861 * xdp_return_frame(), which will involve to xdpf->data and 862 * xdpf->headroom. Therefore, we need to update the value of 863 * headroom synchronously here. 864 */ 865 xdpf->headroom -= vi->hdr_len; 866 xdpf->data -= vi->hdr_len; 867 /* Zero header and leave csum up to XDP layers */ 868 hdr = xdpf->data; 869 memset(hdr, 0, vi->hdr_len); 870 xdpf->len += vi->hdr_len; 871 872 sg_init_table(sq->sg, nr_frags + 1); 873 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 874 for (i = 0; i < nr_frags; i++) { 875 skb_frag_t *frag = &shinfo->frags[i]; 876 877 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 878 skb_frag_size(frag), skb_frag_off(frag)); 879 } 880 881 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 882 xdp_to_ptr(xdpf), GFP_ATOMIC); 883 if (unlikely(err)) 884 return -ENOSPC; /* Caller handle free/refcnt */ 885 886 return 0; 887 } 888 889 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 890 * the current cpu, so it does not need to be locked. 891 * 892 * Here we use marco instead of inline functions because we have to deal with 893 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 894 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 895 * functions to perfectly solve these three problems at the same time. 896 */ 897 #define virtnet_xdp_get_sq(vi) ({ \ 898 int cpu = smp_processor_id(); \ 899 struct netdev_queue *txq; \ 900 typeof(vi) v = (vi); \ 901 unsigned int qp; \ 902 \ 903 if (v->curr_queue_pairs > nr_cpu_ids) { \ 904 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 905 qp += cpu; \ 906 txq = netdev_get_tx_queue(v->dev, qp); \ 907 __netif_tx_acquire(txq); \ 908 } else { \ 909 qp = cpu % v->curr_queue_pairs; \ 910 txq = netdev_get_tx_queue(v->dev, qp); \ 911 __netif_tx_lock(txq, cpu); \ 912 } \ 913 v->sq + qp; \ 914 }) 915 916 #define virtnet_xdp_put_sq(vi, q) { \ 917 struct netdev_queue *txq; \ 918 typeof(vi) v = (vi); \ 919 \ 920 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 921 if (v->curr_queue_pairs > nr_cpu_ids) \ 922 __netif_tx_release(txq); \ 923 else \ 924 __netif_tx_unlock(txq); \ 925 } 926 927 static int virtnet_xdp_xmit(struct net_device *dev, 928 int n, struct xdp_frame **frames, u32 flags) 929 { 930 struct virtnet_info *vi = netdev_priv(dev); 931 struct receive_queue *rq = vi->rq; 932 struct bpf_prog *xdp_prog; 933 struct send_queue *sq; 934 unsigned int len; 935 int packets = 0; 936 int bytes = 0; 937 int nxmit = 0; 938 int kicks = 0; 939 void *ptr; 940 int ret; 941 int i; 942 943 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 944 * indicate XDP resources have been successfully allocated. 945 */ 946 xdp_prog = rcu_access_pointer(rq->xdp_prog); 947 if (!xdp_prog) 948 return -ENXIO; 949 950 sq = virtnet_xdp_get_sq(vi); 951 952 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 953 ret = -EINVAL; 954 goto out; 955 } 956 957 /* Free up any pending old buffers before queueing new ones. */ 958 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 959 if (likely(is_xdp_frame(ptr))) { 960 struct xdp_frame *frame = ptr_to_xdp(ptr); 961 962 bytes += xdp_get_frame_len(frame); 963 xdp_return_frame(frame); 964 } else { 965 struct sk_buff *skb = ptr; 966 967 bytes += skb->len; 968 napi_consume_skb(skb, false); 969 } 970 packets++; 971 } 972 973 for (i = 0; i < n; i++) { 974 struct xdp_frame *xdpf = frames[i]; 975 976 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 977 break; 978 nxmit++; 979 } 980 ret = nxmit; 981 982 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 983 check_sq_full_and_disable(vi, dev, sq); 984 985 if (flags & XDP_XMIT_FLUSH) { 986 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 987 kicks = 1; 988 } 989 out: 990 u64_stats_update_begin(&sq->stats.syncp); 991 u64_stats_add(&sq->stats.bytes, bytes); 992 u64_stats_add(&sq->stats.packets, packets); 993 u64_stats_add(&sq->stats.xdp_tx, n); 994 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 995 u64_stats_add(&sq->stats.kicks, kicks); 996 u64_stats_update_end(&sq->stats.syncp); 997 998 virtnet_xdp_put_sq(vi, sq); 999 return ret; 1000 } 1001 1002 static void put_xdp_frags(struct xdp_buff *xdp) 1003 { 1004 struct skb_shared_info *shinfo; 1005 struct page *xdp_page; 1006 int i; 1007 1008 if (xdp_buff_has_frags(xdp)) { 1009 shinfo = xdp_get_shared_info_from_buff(xdp); 1010 for (i = 0; i < shinfo->nr_frags; i++) { 1011 xdp_page = skb_frag_page(&shinfo->frags[i]); 1012 put_page(xdp_page); 1013 } 1014 } 1015 } 1016 1017 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1018 struct net_device *dev, 1019 unsigned int *xdp_xmit, 1020 struct virtnet_rq_stats *stats) 1021 { 1022 struct xdp_frame *xdpf; 1023 int err; 1024 u32 act; 1025 1026 act = bpf_prog_run_xdp(xdp_prog, xdp); 1027 u64_stats_inc(&stats->xdp_packets); 1028 1029 switch (act) { 1030 case XDP_PASS: 1031 return act; 1032 1033 case XDP_TX: 1034 u64_stats_inc(&stats->xdp_tx); 1035 xdpf = xdp_convert_buff_to_frame(xdp); 1036 if (unlikely(!xdpf)) { 1037 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1038 return XDP_DROP; 1039 } 1040 1041 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1042 if (unlikely(!err)) { 1043 xdp_return_frame_rx_napi(xdpf); 1044 } else if (unlikely(err < 0)) { 1045 trace_xdp_exception(dev, xdp_prog, act); 1046 return XDP_DROP; 1047 } 1048 *xdp_xmit |= VIRTIO_XDP_TX; 1049 return act; 1050 1051 case XDP_REDIRECT: 1052 u64_stats_inc(&stats->xdp_redirects); 1053 err = xdp_do_redirect(dev, xdp, xdp_prog); 1054 if (err) 1055 return XDP_DROP; 1056 1057 *xdp_xmit |= VIRTIO_XDP_REDIR; 1058 return act; 1059 1060 default: 1061 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1062 fallthrough; 1063 case XDP_ABORTED: 1064 trace_xdp_exception(dev, xdp_prog, act); 1065 fallthrough; 1066 case XDP_DROP: 1067 return XDP_DROP; 1068 } 1069 } 1070 1071 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1072 { 1073 return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; 1074 } 1075 1076 /* We copy the packet for XDP in the following cases: 1077 * 1078 * 1) Packet is scattered across multiple rx buffers. 1079 * 2) Headroom space is insufficient. 1080 * 1081 * This is inefficient but it's a temporary condition that 1082 * we hit right after XDP is enabled and until queue is refilled 1083 * with large buffers with sufficient headroom - so it should affect 1084 * at most queue size packets. 1085 * Afterwards, the conditions to enable 1086 * XDP should preclude the underlying device from sending packets 1087 * across multiple buffers (num_buf > 1), and we make sure buffers 1088 * have enough headroom. 1089 */ 1090 static struct page *xdp_linearize_page(struct receive_queue *rq, 1091 int *num_buf, 1092 struct page *p, 1093 int offset, 1094 int page_off, 1095 unsigned int *len) 1096 { 1097 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1098 struct page *page; 1099 1100 if (page_off + *len + tailroom > PAGE_SIZE) 1101 return NULL; 1102 1103 page = alloc_page(GFP_ATOMIC); 1104 if (!page) 1105 return NULL; 1106 1107 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1108 page_off += *len; 1109 1110 while (--*num_buf) { 1111 unsigned int buflen; 1112 void *buf; 1113 int off; 1114 1115 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1116 if (unlikely(!buf)) 1117 goto err_buf; 1118 1119 p = virt_to_head_page(buf); 1120 off = buf - page_address(p); 1121 1122 /* guard against a misconfigured or uncooperative backend that 1123 * is sending packet larger than the MTU. 1124 */ 1125 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1126 put_page(p); 1127 goto err_buf; 1128 } 1129 1130 memcpy(page_address(page) + page_off, 1131 page_address(p) + off, buflen); 1132 page_off += buflen; 1133 put_page(p); 1134 } 1135 1136 /* Headroom does not contribute to packet length */ 1137 *len = page_off - VIRTIO_XDP_HEADROOM; 1138 return page; 1139 err_buf: 1140 __free_pages(page, 0); 1141 return NULL; 1142 } 1143 1144 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1145 unsigned int xdp_headroom, 1146 void *buf, 1147 unsigned int len) 1148 { 1149 unsigned int header_offset; 1150 unsigned int headroom; 1151 unsigned int buflen; 1152 struct sk_buff *skb; 1153 1154 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1155 headroom = vi->hdr_len + header_offset; 1156 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1157 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1158 1159 skb = virtnet_build_skb(buf, buflen, headroom, len); 1160 if (unlikely(!skb)) 1161 return NULL; 1162 1163 buf += header_offset; 1164 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1165 1166 return skb; 1167 } 1168 1169 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1170 struct virtnet_info *vi, 1171 struct receive_queue *rq, 1172 struct bpf_prog *xdp_prog, 1173 void *buf, 1174 unsigned int xdp_headroom, 1175 unsigned int len, 1176 unsigned int *xdp_xmit, 1177 struct virtnet_rq_stats *stats) 1178 { 1179 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1180 unsigned int headroom = vi->hdr_len + header_offset; 1181 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1182 struct page *page = virt_to_head_page(buf); 1183 struct page *xdp_page; 1184 unsigned int buflen; 1185 struct xdp_buff xdp; 1186 struct sk_buff *skb; 1187 unsigned int metasize = 0; 1188 u32 act; 1189 1190 if (unlikely(hdr->hdr.gso_type)) 1191 goto err_xdp; 1192 1193 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1194 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1195 1196 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1197 int offset = buf - page_address(page) + header_offset; 1198 unsigned int tlen = len + vi->hdr_len; 1199 int num_buf = 1; 1200 1201 xdp_headroom = virtnet_get_headroom(vi); 1202 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1203 headroom = vi->hdr_len + header_offset; 1204 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1205 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1206 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1207 offset, header_offset, 1208 &tlen); 1209 if (!xdp_page) 1210 goto err_xdp; 1211 1212 buf = page_address(xdp_page); 1213 put_page(page); 1214 page = xdp_page; 1215 } 1216 1217 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1218 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1219 xdp_headroom, len, true); 1220 1221 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1222 1223 switch (act) { 1224 case XDP_PASS: 1225 /* Recalculate length in case bpf program changed it */ 1226 len = xdp.data_end - xdp.data; 1227 metasize = xdp.data - xdp.data_meta; 1228 break; 1229 1230 case XDP_TX: 1231 case XDP_REDIRECT: 1232 goto xdp_xmit; 1233 1234 default: 1235 goto err_xdp; 1236 } 1237 1238 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1239 if (unlikely(!skb)) 1240 goto err; 1241 1242 if (metasize) 1243 skb_metadata_set(skb, metasize); 1244 1245 return skb; 1246 1247 err_xdp: 1248 u64_stats_inc(&stats->xdp_drops); 1249 err: 1250 u64_stats_inc(&stats->drops); 1251 put_page(page); 1252 xdp_xmit: 1253 return NULL; 1254 } 1255 1256 static struct sk_buff *receive_small(struct net_device *dev, 1257 struct virtnet_info *vi, 1258 struct receive_queue *rq, 1259 void *buf, void *ctx, 1260 unsigned int len, 1261 unsigned int *xdp_xmit, 1262 struct virtnet_rq_stats *stats) 1263 { 1264 unsigned int xdp_headroom = (unsigned long)ctx; 1265 struct page *page = virt_to_head_page(buf); 1266 struct sk_buff *skb; 1267 1268 len -= vi->hdr_len; 1269 u64_stats_add(&stats->bytes, len); 1270 1271 if (unlikely(len > GOOD_PACKET_LEN)) { 1272 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1273 dev->name, len, GOOD_PACKET_LEN); 1274 DEV_STATS_INC(dev, rx_length_errors); 1275 goto err; 1276 } 1277 1278 if (unlikely(vi->xdp_enabled)) { 1279 struct bpf_prog *xdp_prog; 1280 1281 rcu_read_lock(); 1282 xdp_prog = rcu_dereference(rq->xdp_prog); 1283 if (xdp_prog) { 1284 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1285 xdp_headroom, len, xdp_xmit, 1286 stats); 1287 rcu_read_unlock(); 1288 return skb; 1289 } 1290 rcu_read_unlock(); 1291 } 1292 1293 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1294 if (likely(skb)) 1295 return skb; 1296 1297 err: 1298 u64_stats_inc(&stats->drops); 1299 put_page(page); 1300 return NULL; 1301 } 1302 1303 static struct sk_buff *receive_big(struct net_device *dev, 1304 struct virtnet_info *vi, 1305 struct receive_queue *rq, 1306 void *buf, 1307 unsigned int len, 1308 struct virtnet_rq_stats *stats) 1309 { 1310 struct page *page = buf; 1311 struct sk_buff *skb = 1312 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1313 1314 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1315 if (unlikely(!skb)) 1316 goto err; 1317 1318 return skb; 1319 1320 err: 1321 u64_stats_inc(&stats->drops); 1322 give_pages(rq, page); 1323 return NULL; 1324 } 1325 1326 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1327 struct net_device *dev, 1328 struct virtnet_rq_stats *stats) 1329 { 1330 struct page *page; 1331 void *buf; 1332 int len; 1333 1334 while (num_buf-- > 1) { 1335 buf = virtnet_rq_get_buf(rq, &len, NULL); 1336 if (unlikely(!buf)) { 1337 pr_debug("%s: rx error: %d buffers missing\n", 1338 dev->name, num_buf); 1339 DEV_STATS_INC(dev, rx_length_errors); 1340 break; 1341 } 1342 u64_stats_add(&stats->bytes, len); 1343 page = virt_to_head_page(buf); 1344 put_page(page); 1345 } 1346 } 1347 1348 /* Why not use xdp_build_skb_from_frame() ? 1349 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1350 * virtio-net there are 2 points that do not match its requirements: 1351 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1352 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1353 * like eth_type_trans() (which virtio-net does in receive_buf()). 1354 */ 1355 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1356 struct virtnet_info *vi, 1357 struct xdp_buff *xdp, 1358 unsigned int xdp_frags_truesz) 1359 { 1360 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1361 unsigned int headroom, data_len; 1362 struct sk_buff *skb; 1363 int metasize; 1364 u8 nr_frags; 1365 1366 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1367 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1368 return NULL; 1369 } 1370 1371 if (unlikely(xdp_buff_has_frags(xdp))) 1372 nr_frags = sinfo->nr_frags; 1373 1374 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1375 if (unlikely(!skb)) 1376 return NULL; 1377 1378 headroom = xdp->data - xdp->data_hard_start; 1379 data_len = xdp->data_end - xdp->data; 1380 skb_reserve(skb, headroom); 1381 __skb_put(skb, data_len); 1382 1383 metasize = xdp->data - xdp->data_meta; 1384 metasize = metasize > 0 ? metasize : 0; 1385 if (metasize) 1386 skb_metadata_set(skb, metasize); 1387 1388 if (unlikely(xdp_buff_has_frags(xdp))) 1389 xdp_update_skb_shared_info(skb, nr_frags, 1390 sinfo->xdp_frags_size, 1391 xdp_frags_truesz, 1392 xdp_buff_is_frag_pfmemalloc(xdp)); 1393 1394 return skb; 1395 } 1396 1397 /* TODO: build xdp in big mode */ 1398 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1399 struct virtnet_info *vi, 1400 struct receive_queue *rq, 1401 struct xdp_buff *xdp, 1402 void *buf, 1403 unsigned int len, 1404 unsigned int frame_sz, 1405 int *num_buf, 1406 unsigned int *xdp_frags_truesize, 1407 struct virtnet_rq_stats *stats) 1408 { 1409 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1410 unsigned int headroom, tailroom, room; 1411 unsigned int truesize, cur_frag_size; 1412 struct skb_shared_info *shinfo; 1413 unsigned int xdp_frags_truesz = 0; 1414 struct page *page; 1415 skb_frag_t *frag; 1416 int offset; 1417 void *ctx; 1418 1419 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1420 xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, 1421 VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1422 1423 if (!*num_buf) 1424 return 0; 1425 1426 if (*num_buf > 1) { 1427 /* If we want to build multi-buffer xdp, we need 1428 * to specify that the flags of xdp_buff have the 1429 * XDP_FLAGS_HAS_FRAG bit. 1430 */ 1431 if (!xdp_buff_has_frags(xdp)) 1432 xdp_buff_set_frags_flag(xdp); 1433 1434 shinfo = xdp_get_shared_info_from_buff(xdp); 1435 shinfo->nr_frags = 0; 1436 shinfo->xdp_frags_size = 0; 1437 } 1438 1439 if (*num_buf > MAX_SKB_FRAGS + 1) 1440 return -EINVAL; 1441 1442 while (--*num_buf > 0) { 1443 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1444 if (unlikely(!buf)) { 1445 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1446 dev->name, *num_buf, 1447 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1448 DEV_STATS_INC(dev, rx_length_errors); 1449 goto err; 1450 } 1451 1452 u64_stats_add(&stats->bytes, len); 1453 page = virt_to_head_page(buf); 1454 offset = buf - page_address(page); 1455 1456 truesize = mergeable_ctx_to_truesize(ctx); 1457 headroom = mergeable_ctx_to_headroom(ctx); 1458 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1459 room = SKB_DATA_ALIGN(headroom + tailroom); 1460 1461 cur_frag_size = truesize; 1462 xdp_frags_truesz += cur_frag_size; 1463 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 1464 put_page(page); 1465 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1466 dev->name, len, (unsigned long)(truesize - room)); 1467 DEV_STATS_INC(dev, rx_length_errors); 1468 goto err; 1469 } 1470 1471 frag = &shinfo->frags[shinfo->nr_frags++]; 1472 skb_frag_fill_page_desc(frag, page, offset, len); 1473 if (page_is_pfmemalloc(page)) 1474 xdp_buff_set_frag_pfmemalloc(xdp); 1475 1476 shinfo->xdp_frags_size += len; 1477 } 1478 1479 *xdp_frags_truesize = xdp_frags_truesz; 1480 return 0; 1481 1482 err: 1483 put_xdp_frags(xdp); 1484 return -EINVAL; 1485 } 1486 1487 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 1488 struct receive_queue *rq, 1489 struct bpf_prog *xdp_prog, 1490 void *ctx, 1491 unsigned int *frame_sz, 1492 int *num_buf, 1493 struct page **page, 1494 int offset, 1495 unsigned int *len, 1496 struct virtio_net_hdr_mrg_rxbuf *hdr) 1497 { 1498 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1499 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1500 struct page *xdp_page; 1501 unsigned int xdp_room; 1502 1503 /* Transient failure which in theory could occur if 1504 * in-flight packets from before XDP was enabled reach 1505 * the receive path after XDP is loaded. 1506 */ 1507 if (unlikely(hdr->hdr.gso_type)) 1508 return NULL; 1509 1510 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 1511 * with headroom may add hole in truesize, which 1512 * make their length exceed PAGE_SIZE. So we disabled the 1513 * hole mechanism for xdp. See add_recvbuf_mergeable(). 1514 */ 1515 *frame_sz = truesize; 1516 1517 if (likely(headroom >= virtnet_get_headroom(vi) && 1518 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 1519 return page_address(*page) + offset; 1520 } 1521 1522 /* This happens when headroom is not enough because 1523 * of the buffer was prefilled before XDP is set. 1524 * This should only happen for the first several packets. 1525 * In fact, vq reset can be used here to help us clean up 1526 * the prefilled buffers, but many existing devices do not 1527 * support it, and we don't want to bother users who are 1528 * using xdp normally. 1529 */ 1530 if (!xdp_prog->aux->xdp_has_frags) { 1531 /* linearize data for XDP */ 1532 xdp_page = xdp_linearize_page(rq, num_buf, 1533 *page, offset, 1534 VIRTIO_XDP_HEADROOM, 1535 len); 1536 if (!xdp_page) 1537 return NULL; 1538 } else { 1539 xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 1540 sizeof(struct skb_shared_info)); 1541 if (*len + xdp_room > PAGE_SIZE) 1542 return NULL; 1543 1544 xdp_page = alloc_page(GFP_ATOMIC); 1545 if (!xdp_page) 1546 return NULL; 1547 1548 memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, 1549 page_address(*page) + offset, *len); 1550 } 1551 1552 *frame_sz = PAGE_SIZE; 1553 1554 put_page(*page); 1555 1556 *page = xdp_page; 1557 1558 return page_address(*page) + VIRTIO_XDP_HEADROOM; 1559 } 1560 1561 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 1562 struct virtnet_info *vi, 1563 struct receive_queue *rq, 1564 struct bpf_prog *xdp_prog, 1565 void *buf, 1566 void *ctx, 1567 unsigned int len, 1568 unsigned int *xdp_xmit, 1569 struct virtnet_rq_stats *stats) 1570 { 1571 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1572 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1573 struct page *page = virt_to_head_page(buf); 1574 int offset = buf - page_address(page); 1575 unsigned int xdp_frags_truesz = 0; 1576 struct sk_buff *head_skb; 1577 unsigned int frame_sz; 1578 struct xdp_buff xdp; 1579 void *data; 1580 u32 act; 1581 int err; 1582 1583 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 1584 offset, &len, hdr); 1585 if (unlikely(!data)) 1586 goto err_xdp; 1587 1588 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 1589 &num_buf, &xdp_frags_truesz, stats); 1590 if (unlikely(err)) 1591 goto err_xdp; 1592 1593 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1594 1595 switch (act) { 1596 case XDP_PASS: 1597 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 1598 if (unlikely(!head_skb)) 1599 break; 1600 return head_skb; 1601 1602 case XDP_TX: 1603 case XDP_REDIRECT: 1604 return NULL; 1605 1606 default: 1607 break; 1608 } 1609 1610 put_xdp_frags(&xdp); 1611 1612 err_xdp: 1613 put_page(page); 1614 mergeable_buf_free(rq, num_buf, dev, stats); 1615 1616 u64_stats_inc(&stats->xdp_drops); 1617 u64_stats_inc(&stats->drops); 1618 return NULL; 1619 } 1620 1621 static struct sk_buff *receive_mergeable(struct net_device *dev, 1622 struct virtnet_info *vi, 1623 struct receive_queue *rq, 1624 void *buf, 1625 void *ctx, 1626 unsigned int len, 1627 unsigned int *xdp_xmit, 1628 struct virtnet_rq_stats *stats) 1629 { 1630 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1631 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1632 struct page *page = virt_to_head_page(buf); 1633 int offset = buf - page_address(page); 1634 struct sk_buff *head_skb, *curr_skb; 1635 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1636 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1637 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1638 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1639 1640 head_skb = NULL; 1641 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1642 1643 if (unlikely(len > truesize - room)) { 1644 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1645 dev->name, len, (unsigned long)(truesize - room)); 1646 DEV_STATS_INC(dev, rx_length_errors); 1647 goto err_skb; 1648 } 1649 1650 if (unlikely(vi->xdp_enabled)) { 1651 struct bpf_prog *xdp_prog; 1652 1653 rcu_read_lock(); 1654 xdp_prog = rcu_dereference(rq->xdp_prog); 1655 if (xdp_prog) { 1656 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 1657 len, xdp_xmit, stats); 1658 rcu_read_unlock(); 1659 return head_skb; 1660 } 1661 rcu_read_unlock(); 1662 } 1663 1664 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 1665 curr_skb = head_skb; 1666 1667 if (unlikely(!curr_skb)) 1668 goto err_skb; 1669 while (--num_buf) { 1670 int num_skb_frags; 1671 1672 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1673 if (unlikely(!buf)) { 1674 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1675 dev->name, num_buf, 1676 virtio16_to_cpu(vi->vdev, 1677 hdr->num_buffers)); 1678 DEV_STATS_INC(dev, rx_length_errors); 1679 goto err_buf; 1680 } 1681 1682 u64_stats_add(&stats->bytes, len); 1683 page = virt_to_head_page(buf); 1684 1685 truesize = mergeable_ctx_to_truesize(ctx); 1686 headroom = mergeable_ctx_to_headroom(ctx); 1687 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1688 room = SKB_DATA_ALIGN(headroom + tailroom); 1689 if (unlikely(len > truesize - room)) { 1690 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1691 dev->name, len, (unsigned long)(truesize - room)); 1692 DEV_STATS_INC(dev, rx_length_errors); 1693 goto err_skb; 1694 } 1695 1696 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 1697 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 1698 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 1699 1700 if (unlikely(!nskb)) 1701 goto err_skb; 1702 if (curr_skb == head_skb) 1703 skb_shinfo(curr_skb)->frag_list = nskb; 1704 else 1705 curr_skb->next = nskb; 1706 curr_skb = nskb; 1707 head_skb->truesize += nskb->truesize; 1708 num_skb_frags = 0; 1709 } 1710 if (curr_skb != head_skb) { 1711 head_skb->data_len += len; 1712 head_skb->len += len; 1713 head_skb->truesize += truesize; 1714 } 1715 offset = buf - page_address(page); 1716 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 1717 put_page(page); 1718 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 1719 len, truesize); 1720 } else { 1721 skb_add_rx_frag(curr_skb, num_skb_frags, page, 1722 offset, len, truesize); 1723 } 1724 } 1725 1726 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 1727 return head_skb; 1728 1729 err_skb: 1730 put_page(page); 1731 mergeable_buf_free(rq, num_buf, dev, stats); 1732 1733 err_buf: 1734 u64_stats_inc(&stats->drops); 1735 dev_kfree_skb(head_skb); 1736 return NULL; 1737 } 1738 1739 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 1740 struct sk_buff *skb) 1741 { 1742 enum pkt_hash_types rss_hash_type; 1743 1744 if (!hdr_hash || !skb) 1745 return; 1746 1747 switch (__le16_to_cpu(hdr_hash->hash_report)) { 1748 case VIRTIO_NET_HASH_REPORT_TCPv4: 1749 case VIRTIO_NET_HASH_REPORT_UDPv4: 1750 case VIRTIO_NET_HASH_REPORT_TCPv6: 1751 case VIRTIO_NET_HASH_REPORT_UDPv6: 1752 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 1753 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 1754 rss_hash_type = PKT_HASH_TYPE_L4; 1755 break; 1756 case VIRTIO_NET_HASH_REPORT_IPv4: 1757 case VIRTIO_NET_HASH_REPORT_IPv6: 1758 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 1759 rss_hash_type = PKT_HASH_TYPE_L3; 1760 break; 1761 case VIRTIO_NET_HASH_REPORT_NONE: 1762 default: 1763 rss_hash_type = PKT_HASH_TYPE_NONE; 1764 } 1765 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 1766 } 1767 1768 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1769 void *buf, unsigned int len, void **ctx, 1770 unsigned int *xdp_xmit, 1771 struct virtnet_rq_stats *stats) 1772 { 1773 struct net_device *dev = vi->dev; 1774 struct sk_buff *skb; 1775 struct virtio_net_common_hdr *hdr; 1776 1777 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1778 pr_debug("%s: short packet %i\n", dev->name, len); 1779 DEV_STATS_INC(dev, rx_length_errors); 1780 virtnet_rq_free_buf(vi, rq, buf); 1781 return; 1782 } 1783 1784 if (vi->mergeable_rx_bufs) 1785 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1786 stats); 1787 else if (vi->big_packets) 1788 skb = receive_big(dev, vi, rq, buf, len, stats); 1789 else 1790 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1791 1792 if (unlikely(!skb)) 1793 return; 1794 1795 hdr = skb_vnet_common_hdr(skb); 1796 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 1797 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 1798 1799 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1800 skb->ip_summed = CHECKSUM_UNNECESSARY; 1801 1802 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1803 virtio_is_little_endian(vi->vdev))) { 1804 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1805 dev->name, hdr->hdr.gso_type, 1806 hdr->hdr.gso_size); 1807 goto frame_err; 1808 } 1809 1810 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 1811 skb->protocol = eth_type_trans(skb, dev); 1812 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1813 ntohs(skb->protocol), skb->len, skb->pkt_type); 1814 1815 napi_gro_receive(&rq->napi, skb); 1816 return; 1817 1818 frame_err: 1819 DEV_STATS_INC(dev, rx_frame_errors); 1820 dev_kfree_skb(skb); 1821 } 1822 1823 /* Unlike mergeable buffers, all buffers are allocated to the 1824 * same size, except for the headroom. For this reason we do 1825 * not need to use mergeable_len_to_ctx here - it is enough 1826 * to store the headroom as the context ignoring the truesize. 1827 */ 1828 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 1829 gfp_t gfp) 1830 { 1831 char *buf; 1832 unsigned int xdp_headroom = virtnet_get_headroom(vi); 1833 void *ctx = (void *)(unsigned long)xdp_headroom; 1834 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 1835 int err; 1836 1837 len = SKB_DATA_ALIGN(len) + 1838 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1839 1840 buf = virtnet_rq_alloc(rq, len, gfp); 1841 if (unlikely(!buf)) 1842 return -ENOMEM; 1843 1844 virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, 1845 vi->hdr_len + GOOD_PACKET_LEN); 1846 1847 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1848 if (err < 0) { 1849 if (rq->do_dma) 1850 virtnet_rq_unmap(rq, buf, 0); 1851 put_page(virt_to_head_page(buf)); 1852 } 1853 1854 return err; 1855 } 1856 1857 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 1858 gfp_t gfp) 1859 { 1860 struct page *first, *list = NULL; 1861 char *p; 1862 int i, err, offset; 1863 1864 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 1865 1866 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 1867 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 1868 first = get_a_page(rq, gfp); 1869 if (!first) { 1870 if (list) 1871 give_pages(rq, list); 1872 return -ENOMEM; 1873 } 1874 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 1875 1876 /* chain new page in list head to match sg */ 1877 first->private = (unsigned long)list; 1878 list = first; 1879 } 1880 1881 first = get_a_page(rq, gfp); 1882 if (!first) { 1883 give_pages(rq, list); 1884 return -ENOMEM; 1885 } 1886 p = page_address(first); 1887 1888 /* rq->sg[0], rq->sg[1] share the same page */ 1889 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 1890 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 1891 1892 /* rq->sg[1] for data packet, from offset */ 1893 offset = sizeof(struct padded_vnet_hdr); 1894 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 1895 1896 /* chain first in list head */ 1897 first->private = (unsigned long)list; 1898 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 1899 first, gfp); 1900 if (err < 0) 1901 give_pages(rq, first); 1902 1903 return err; 1904 } 1905 1906 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1907 struct ewma_pkt_len *avg_pkt_len, 1908 unsigned int room) 1909 { 1910 struct virtnet_info *vi = rq->vq->vdev->priv; 1911 const size_t hdr_len = vi->hdr_len; 1912 unsigned int len; 1913 1914 if (room) 1915 return PAGE_SIZE - room; 1916 1917 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1918 rq->min_buf_len, PAGE_SIZE - hdr_len); 1919 1920 return ALIGN(len, L1_CACHE_BYTES); 1921 } 1922 1923 static int add_recvbuf_mergeable(struct virtnet_info *vi, 1924 struct receive_queue *rq, gfp_t gfp) 1925 { 1926 struct page_frag *alloc_frag = &rq->alloc_frag; 1927 unsigned int headroom = virtnet_get_headroom(vi); 1928 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1929 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1930 unsigned int len, hole; 1931 void *ctx; 1932 char *buf; 1933 int err; 1934 1935 /* Extra tailroom is needed to satisfy XDP's assumption. This 1936 * means rx frags coalescing won't work, but consider we've 1937 * disabled GSO for XDP, it won't be a big issue. 1938 */ 1939 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 1940 1941 buf = virtnet_rq_alloc(rq, len + room, gfp); 1942 if (unlikely(!buf)) 1943 return -ENOMEM; 1944 1945 buf += headroom; /* advance address leaving hole at front of pkt */ 1946 hole = alloc_frag->size - alloc_frag->offset; 1947 if (hole < len + room) { 1948 /* To avoid internal fragmentation, if there is very likely not 1949 * enough space for another buffer, add the remaining space to 1950 * the current buffer. 1951 * XDP core assumes that frame_size of xdp_buff and the length 1952 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 1953 */ 1954 if (!headroom) 1955 len += hole; 1956 alloc_frag->offset += hole; 1957 } 1958 1959 virtnet_rq_init_one_sg(rq, buf, len); 1960 1961 ctx = mergeable_len_to_ctx(len + room, headroom); 1962 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1963 if (err < 0) { 1964 if (rq->do_dma) 1965 virtnet_rq_unmap(rq, buf, 0); 1966 put_page(virt_to_head_page(buf)); 1967 } 1968 1969 return err; 1970 } 1971 1972 /* 1973 * Returns false if we couldn't fill entirely (OOM). 1974 * 1975 * Normally run in the receive path, but can also be run from ndo_open 1976 * before we're receiving packets, or from refill_work which is 1977 * careful to disable receiving (using napi_disable). 1978 */ 1979 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 1980 gfp_t gfp) 1981 { 1982 int err; 1983 bool oom; 1984 1985 do { 1986 if (vi->mergeable_rx_bufs) 1987 err = add_recvbuf_mergeable(vi, rq, gfp); 1988 else if (vi->big_packets) 1989 err = add_recvbuf_big(vi, rq, gfp); 1990 else 1991 err = add_recvbuf_small(vi, rq, gfp); 1992 1993 oom = err == -ENOMEM; 1994 if (err) 1995 break; 1996 } while (rq->vq->num_free); 1997 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 1998 unsigned long flags; 1999 2000 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2001 u64_stats_inc(&rq->stats.kicks); 2002 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2003 } 2004 2005 return !oom; 2006 } 2007 2008 static void skb_recv_done(struct virtqueue *rvq) 2009 { 2010 struct virtnet_info *vi = rvq->vdev->priv; 2011 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2012 2013 virtqueue_napi_schedule(&rq->napi, rvq); 2014 } 2015 2016 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2017 { 2018 napi_enable(napi); 2019 2020 /* If all buffers were filled by other side before we napi_enabled, we 2021 * won't get another interrupt, so process any outstanding packets now. 2022 * Call local_bh_enable after to trigger softIRQ processing. 2023 */ 2024 local_bh_disable(); 2025 virtqueue_napi_schedule(napi, vq); 2026 local_bh_enable(); 2027 } 2028 2029 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2030 struct virtqueue *vq, 2031 struct napi_struct *napi) 2032 { 2033 if (!napi->weight) 2034 return; 2035 2036 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2037 * enable the feature if this is likely affine with the transmit path. 2038 */ 2039 if (!vi->affinity_hint_set) { 2040 napi->weight = 0; 2041 return; 2042 } 2043 2044 return virtnet_napi_enable(vq, napi); 2045 } 2046 2047 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2048 { 2049 if (napi->weight) 2050 napi_disable(napi); 2051 } 2052 2053 static void refill_work(struct work_struct *work) 2054 { 2055 struct virtnet_info *vi = 2056 container_of(work, struct virtnet_info, refill.work); 2057 bool still_empty; 2058 int i; 2059 2060 for (i = 0; i < vi->curr_queue_pairs; i++) { 2061 struct receive_queue *rq = &vi->rq[i]; 2062 2063 napi_disable(&rq->napi); 2064 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2065 virtnet_napi_enable(rq->vq, &rq->napi); 2066 2067 /* In theory, this can happen: if we don't get any buffers in 2068 * we will *never* try to fill again. 2069 */ 2070 if (still_empty) 2071 schedule_delayed_work(&vi->refill, HZ/2); 2072 } 2073 } 2074 2075 static int virtnet_receive(struct receive_queue *rq, int budget, 2076 unsigned int *xdp_xmit) 2077 { 2078 struct virtnet_info *vi = rq->vq->vdev->priv; 2079 struct virtnet_rq_stats stats = {}; 2080 unsigned int len; 2081 int packets = 0; 2082 void *buf; 2083 int i; 2084 2085 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2086 void *ctx; 2087 2088 while (packets < budget && 2089 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2090 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); 2091 packets++; 2092 } 2093 } else { 2094 while (packets < budget && 2095 (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { 2096 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); 2097 packets++; 2098 } 2099 } 2100 2101 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2102 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2103 spin_lock(&vi->refill_lock); 2104 if (vi->refill_enabled) 2105 schedule_delayed_work(&vi->refill, 0); 2106 spin_unlock(&vi->refill_lock); 2107 } 2108 } 2109 2110 u64_stats_set(&stats.packets, packets); 2111 u64_stats_update_begin(&rq->stats.syncp); 2112 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { 2113 size_t offset = virtnet_rq_stats_desc[i].offset; 2114 u64_stats_t *item, *src; 2115 2116 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2117 src = (u64_stats_t *)((u8 *)&stats + offset); 2118 u64_stats_add(item, u64_stats_read(src)); 2119 } 2120 u64_stats_update_end(&rq->stats.syncp); 2121 2122 return packets; 2123 } 2124 2125 static void virtnet_poll_cleantx(struct receive_queue *rq) 2126 { 2127 struct virtnet_info *vi = rq->vq->vdev->priv; 2128 unsigned int index = vq2rxq(rq->vq); 2129 struct send_queue *sq = &vi->sq[index]; 2130 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2131 2132 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2133 return; 2134 2135 if (__netif_tx_trylock(txq)) { 2136 if (sq->reset) { 2137 __netif_tx_unlock(txq); 2138 return; 2139 } 2140 2141 do { 2142 virtqueue_disable_cb(sq->vq); 2143 free_old_xmit_skbs(sq, true); 2144 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2145 2146 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 2147 netif_tx_wake_queue(txq); 2148 2149 __netif_tx_unlock(txq); 2150 } 2151 } 2152 2153 static int virtnet_poll(struct napi_struct *napi, int budget) 2154 { 2155 struct receive_queue *rq = 2156 container_of(napi, struct receive_queue, napi); 2157 struct virtnet_info *vi = rq->vq->vdev->priv; 2158 struct send_queue *sq; 2159 unsigned int received; 2160 unsigned int xdp_xmit = 0; 2161 2162 virtnet_poll_cleantx(rq); 2163 2164 received = virtnet_receive(rq, budget, &xdp_xmit); 2165 2166 if (xdp_xmit & VIRTIO_XDP_REDIR) 2167 xdp_do_flush(); 2168 2169 /* Out of packets? */ 2170 if (received < budget) 2171 virtqueue_napi_complete(napi, rq->vq, received); 2172 2173 if (xdp_xmit & VIRTIO_XDP_TX) { 2174 sq = virtnet_xdp_get_sq(vi); 2175 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2176 u64_stats_update_begin(&sq->stats.syncp); 2177 u64_stats_inc(&sq->stats.kicks); 2178 u64_stats_update_end(&sq->stats.syncp); 2179 } 2180 virtnet_xdp_put_sq(vi, sq); 2181 } 2182 2183 return received; 2184 } 2185 2186 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2187 { 2188 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2189 napi_disable(&vi->rq[qp_index].napi); 2190 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2191 } 2192 2193 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2194 { 2195 struct net_device *dev = vi->dev; 2196 int err; 2197 2198 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2199 vi->rq[qp_index].napi.napi_id); 2200 if (err < 0) 2201 return err; 2202 2203 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2204 MEM_TYPE_PAGE_SHARED, NULL); 2205 if (err < 0) 2206 goto err_xdp_reg_mem_model; 2207 2208 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2209 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2210 2211 return 0; 2212 2213 err_xdp_reg_mem_model: 2214 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2215 return err; 2216 } 2217 2218 static int virtnet_open(struct net_device *dev) 2219 { 2220 struct virtnet_info *vi = netdev_priv(dev); 2221 int i, err; 2222 2223 enable_delayed_refill(vi); 2224 2225 for (i = 0; i < vi->max_queue_pairs; i++) { 2226 if (i < vi->curr_queue_pairs) 2227 /* Make sure we have some buffers: if oom use wq. */ 2228 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2229 schedule_delayed_work(&vi->refill, 0); 2230 2231 err = virtnet_enable_queue_pair(vi, i); 2232 if (err < 0) 2233 goto err_enable_qp; 2234 } 2235 2236 return 0; 2237 2238 err_enable_qp: 2239 disable_delayed_refill(vi); 2240 cancel_delayed_work_sync(&vi->refill); 2241 2242 for (i--; i >= 0; i--) 2243 virtnet_disable_queue_pair(vi, i); 2244 return err; 2245 } 2246 2247 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2248 { 2249 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2250 struct virtnet_info *vi = sq->vq->vdev->priv; 2251 unsigned int index = vq2txq(sq->vq); 2252 struct netdev_queue *txq; 2253 int opaque; 2254 bool done; 2255 2256 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2257 /* We don't need to enable cb for XDP */ 2258 napi_complete_done(napi, 0); 2259 return 0; 2260 } 2261 2262 txq = netdev_get_tx_queue(vi->dev, index); 2263 __netif_tx_lock(txq, raw_smp_processor_id()); 2264 virtqueue_disable_cb(sq->vq); 2265 free_old_xmit_skbs(sq, true); 2266 2267 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 2268 netif_tx_wake_queue(txq); 2269 2270 opaque = virtqueue_enable_cb_prepare(sq->vq); 2271 2272 done = napi_complete_done(napi, 0); 2273 2274 if (!done) 2275 virtqueue_disable_cb(sq->vq); 2276 2277 __netif_tx_unlock(txq); 2278 2279 if (done) { 2280 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2281 if (napi_schedule_prep(napi)) { 2282 __netif_tx_lock(txq, raw_smp_processor_id()); 2283 virtqueue_disable_cb(sq->vq); 2284 __netif_tx_unlock(txq); 2285 __napi_schedule(napi); 2286 } 2287 } 2288 } 2289 2290 return 0; 2291 } 2292 2293 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 2294 { 2295 struct virtio_net_hdr_mrg_rxbuf *hdr; 2296 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 2297 struct virtnet_info *vi = sq->vq->vdev->priv; 2298 int num_sg; 2299 unsigned hdr_len = vi->hdr_len; 2300 bool can_push; 2301 2302 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 2303 2304 can_push = vi->any_header_sg && 2305 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 2306 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 2307 /* Even if we can, don't push here yet as this would skew 2308 * csum_start offset below. */ 2309 if (can_push) 2310 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 2311 else 2312 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 2313 2314 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 2315 virtio_is_little_endian(vi->vdev), false, 2316 0)) 2317 return -EPROTO; 2318 2319 if (vi->mergeable_rx_bufs) 2320 hdr->num_buffers = 0; 2321 2322 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 2323 if (can_push) { 2324 __skb_push(skb, hdr_len); 2325 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 2326 if (unlikely(num_sg < 0)) 2327 return num_sg; 2328 /* Pull header back to avoid skew in tx bytes calculations. */ 2329 __skb_pull(skb, hdr_len); 2330 } else { 2331 sg_set_buf(sq->sg, hdr, hdr_len); 2332 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 2333 if (unlikely(num_sg < 0)) 2334 return num_sg; 2335 num_sg++; 2336 } 2337 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 2338 } 2339 2340 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 2341 { 2342 struct virtnet_info *vi = netdev_priv(dev); 2343 int qnum = skb_get_queue_mapping(skb); 2344 struct send_queue *sq = &vi->sq[qnum]; 2345 int err; 2346 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 2347 bool kick = !netdev_xmit_more(); 2348 bool use_napi = sq->napi.weight; 2349 2350 /* Free up any pending old buffers before queueing new ones. */ 2351 do { 2352 if (use_napi) 2353 virtqueue_disable_cb(sq->vq); 2354 2355 free_old_xmit_skbs(sq, false); 2356 2357 } while (use_napi && kick && 2358 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2359 2360 /* timestamp packet in software */ 2361 skb_tx_timestamp(skb); 2362 2363 /* Try to transmit */ 2364 err = xmit_skb(sq, skb); 2365 2366 /* This should not happen! */ 2367 if (unlikely(err)) { 2368 DEV_STATS_INC(dev, tx_fifo_errors); 2369 if (net_ratelimit()) 2370 dev_warn(&dev->dev, 2371 "Unexpected TXQ (%d) queue failure: %d\n", 2372 qnum, err); 2373 DEV_STATS_INC(dev, tx_dropped); 2374 dev_kfree_skb_any(skb); 2375 return NETDEV_TX_OK; 2376 } 2377 2378 /* Don't wait up for transmitted skbs to be freed. */ 2379 if (!use_napi) { 2380 skb_orphan(skb); 2381 nf_reset_ct(skb); 2382 } 2383 2384 check_sq_full_and_disable(vi, dev, sq); 2385 2386 if (kick || netif_xmit_stopped(txq)) { 2387 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2388 u64_stats_update_begin(&sq->stats.syncp); 2389 u64_stats_inc(&sq->stats.kicks); 2390 u64_stats_update_end(&sq->stats.syncp); 2391 } 2392 } 2393 2394 return NETDEV_TX_OK; 2395 } 2396 2397 static int virtnet_rx_resize(struct virtnet_info *vi, 2398 struct receive_queue *rq, u32 ring_num) 2399 { 2400 bool running = netif_running(vi->dev); 2401 int err, qindex; 2402 2403 qindex = rq - vi->rq; 2404 2405 if (running) 2406 napi_disable(&rq->napi); 2407 2408 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 2409 if (err) 2410 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 2411 2412 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 2413 schedule_delayed_work(&vi->refill, 0); 2414 2415 if (running) 2416 virtnet_napi_enable(rq->vq, &rq->napi); 2417 return err; 2418 } 2419 2420 static int virtnet_tx_resize(struct virtnet_info *vi, 2421 struct send_queue *sq, u32 ring_num) 2422 { 2423 bool running = netif_running(vi->dev); 2424 struct netdev_queue *txq; 2425 int err, qindex; 2426 2427 qindex = sq - vi->sq; 2428 2429 if (running) 2430 virtnet_napi_tx_disable(&sq->napi); 2431 2432 txq = netdev_get_tx_queue(vi->dev, qindex); 2433 2434 /* 1. wait all ximt complete 2435 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 2436 */ 2437 __netif_tx_lock_bh(txq); 2438 2439 /* Prevent rx poll from accessing sq. */ 2440 sq->reset = true; 2441 2442 /* Prevent the upper layer from trying to send packets. */ 2443 netif_stop_subqueue(vi->dev, qindex); 2444 2445 __netif_tx_unlock_bh(txq); 2446 2447 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 2448 if (err) 2449 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 2450 2451 __netif_tx_lock_bh(txq); 2452 sq->reset = false; 2453 netif_tx_wake_queue(txq); 2454 __netif_tx_unlock_bh(txq); 2455 2456 if (running) 2457 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 2458 return err; 2459 } 2460 2461 /* 2462 * Send command via the control virtqueue and check status. Commands 2463 * supported by the hypervisor, as indicated by feature bits, should 2464 * never fail unless improperly formatted. 2465 */ 2466 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 2467 struct scatterlist *out) 2468 { 2469 struct scatterlist *sgs[4], hdr, stat; 2470 unsigned out_num = 0, tmp; 2471 int ret; 2472 2473 /* Caller should know better */ 2474 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 2475 2476 vi->ctrl->status = ~0; 2477 vi->ctrl->hdr.class = class; 2478 vi->ctrl->hdr.cmd = cmd; 2479 /* Add header */ 2480 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 2481 sgs[out_num++] = &hdr; 2482 2483 if (out) 2484 sgs[out_num++] = out; 2485 2486 /* Add return status. */ 2487 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 2488 sgs[out_num] = &stat; 2489 2490 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); 2491 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); 2492 if (ret < 0) { 2493 dev_warn(&vi->vdev->dev, 2494 "Failed to add sgs for command vq: %d\n.", ret); 2495 return false; 2496 } 2497 2498 if (unlikely(!virtqueue_kick(vi->cvq))) 2499 return vi->ctrl->status == VIRTIO_NET_OK; 2500 2501 /* Spin for a response, the kick causes an ioport write, trapping 2502 * into the hypervisor, so the request should be handled immediately. 2503 */ 2504 while (!virtqueue_get_buf(vi->cvq, &tmp) && 2505 !virtqueue_is_broken(vi->cvq)) 2506 cpu_relax(); 2507 2508 return vi->ctrl->status == VIRTIO_NET_OK; 2509 } 2510 2511 static int virtnet_set_mac_address(struct net_device *dev, void *p) 2512 { 2513 struct virtnet_info *vi = netdev_priv(dev); 2514 struct virtio_device *vdev = vi->vdev; 2515 int ret; 2516 struct sockaddr *addr; 2517 struct scatterlist sg; 2518 2519 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 2520 return -EOPNOTSUPP; 2521 2522 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 2523 if (!addr) 2524 return -ENOMEM; 2525 2526 ret = eth_prepare_mac_addr_change(dev, addr); 2527 if (ret) 2528 goto out; 2529 2530 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 2531 sg_init_one(&sg, addr->sa_data, dev->addr_len); 2532 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2533 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 2534 dev_warn(&vdev->dev, 2535 "Failed to set mac address by vq command.\n"); 2536 ret = -EINVAL; 2537 goto out; 2538 } 2539 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 2540 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2541 unsigned int i; 2542 2543 /* Naturally, this has an atomicity problem. */ 2544 for (i = 0; i < dev->addr_len; i++) 2545 virtio_cwrite8(vdev, 2546 offsetof(struct virtio_net_config, mac) + 2547 i, addr->sa_data[i]); 2548 } 2549 2550 eth_commit_mac_addr_change(dev, p); 2551 ret = 0; 2552 2553 out: 2554 kfree(addr); 2555 return ret; 2556 } 2557 2558 static void virtnet_stats(struct net_device *dev, 2559 struct rtnl_link_stats64 *tot) 2560 { 2561 struct virtnet_info *vi = netdev_priv(dev); 2562 unsigned int start; 2563 int i; 2564 2565 for (i = 0; i < vi->max_queue_pairs; i++) { 2566 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 2567 struct receive_queue *rq = &vi->rq[i]; 2568 struct send_queue *sq = &vi->sq[i]; 2569 2570 do { 2571 start = u64_stats_fetch_begin(&sq->stats.syncp); 2572 tpackets = u64_stats_read(&sq->stats.packets); 2573 tbytes = u64_stats_read(&sq->stats.bytes); 2574 terrors = u64_stats_read(&sq->stats.tx_timeouts); 2575 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2576 2577 do { 2578 start = u64_stats_fetch_begin(&rq->stats.syncp); 2579 rpackets = u64_stats_read(&rq->stats.packets); 2580 rbytes = u64_stats_read(&rq->stats.bytes); 2581 rdrops = u64_stats_read(&rq->stats.drops); 2582 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2583 2584 tot->rx_packets += rpackets; 2585 tot->tx_packets += tpackets; 2586 tot->rx_bytes += rbytes; 2587 tot->tx_bytes += tbytes; 2588 tot->rx_dropped += rdrops; 2589 tot->tx_errors += terrors; 2590 } 2591 2592 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 2593 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 2594 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 2595 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 2596 } 2597 2598 static void virtnet_ack_link_announce(struct virtnet_info *vi) 2599 { 2600 rtnl_lock(); 2601 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 2602 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 2603 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 2604 rtnl_unlock(); 2605 } 2606 2607 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2608 { 2609 struct scatterlist sg; 2610 struct net_device *dev = vi->dev; 2611 2612 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 2613 return 0; 2614 2615 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 2616 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); 2617 2618 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2619 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 2620 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 2621 queue_pairs); 2622 return -EINVAL; 2623 } else { 2624 vi->curr_queue_pairs = queue_pairs; 2625 /* virtnet_open() will refill when device is going to up. */ 2626 if (dev->flags & IFF_UP) 2627 schedule_delayed_work(&vi->refill, 0); 2628 } 2629 2630 return 0; 2631 } 2632 2633 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2634 { 2635 int err; 2636 2637 rtnl_lock(); 2638 err = _virtnet_set_queues(vi, queue_pairs); 2639 rtnl_unlock(); 2640 return err; 2641 } 2642 2643 static int virtnet_close(struct net_device *dev) 2644 { 2645 struct virtnet_info *vi = netdev_priv(dev); 2646 int i; 2647 2648 /* Make sure NAPI doesn't schedule refill work */ 2649 disable_delayed_refill(vi); 2650 /* Make sure refill_work doesn't re-enable napi! */ 2651 cancel_delayed_work_sync(&vi->refill); 2652 2653 for (i = 0; i < vi->max_queue_pairs; i++) 2654 virtnet_disable_queue_pair(vi, i); 2655 2656 return 0; 2657 } 2658 2659 static void virtnet_set_rx_mode(struct net_device *dev) 2660 { 2661 struct virtnet_info *vi = netdev_priv(dev); 2662 struct scatterlist sg[2]; 2663 struct virtio_net_ctrl_mac *mac_data; 2664 struct netdev_hw_addr *ha; 2665 int uc_count; 2666 int mc_count; 2667 void *buf; 2668 int i; 2669 2670 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 2671 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 2672 return; 2673 2674 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); 2675 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); 2676 2677 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); 2678 2679 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2680 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 2681 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 2682 vi->ctrl->promisc ? "en" : "dis"); 2683 2684 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); 2685 2686 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2687 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 2688 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 2689 vi->ctrl->allmulti ? "en" : "dis"); 2690 2691 uc_count = netdev_uc_count(dev); 2692 mc_count = netdev_mc_count(dev); 2693 /* MAC filter - use one buffer for both lists */ 2694 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 2695 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 2696 mac_data = buf; 2697 if (!buf) 2698 return; 2699 2700 sg_init_table(sg, 2); 2701 2702 /* Store the unicast list and count in the front of the buffer */ 2703 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 2704 i = 0; 2705 netdev_for_each_uc_addr(ha, dev) 2706 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2707 2708 sg_set_buf(&sg[0], mac_data, 2709 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 2710 2711 /* multicast list and count fill the end */ 2712 mac_data = (void *)&mac_data->macs[uc_count][0]; 2713 2714 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 2715 i = 0; 2716 netdev_for_each_mc_addr(ha, dev) 2717 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2718 2719 sg_set_buf(&sg[1], mac_data, 2720 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 2721 2722 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2723 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 2724 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 2725 2726 kfree(buf); 2727 } 2728 2729 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 2730 __be16 proto, u16 vid) 2731 { 2732 struct virtnet_info *vi = netdev_priv(dev); 2733 struct scatterlist sg; 2734 2735 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2736 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2737 2738 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2739 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 2740 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 2741 return 0; 2742 } 2743 2744 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 2745 __be16 proto, u16 vid) 2746 { 2747 struct virtnet_info *vi = netdev_priv(dev); 2748 struct scatterlist sg; 2749 2750 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2751 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2752 2753 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2754 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 2755 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 2756 return 0; 2757 } 2758 2759 static void virtnet_clean_affinity(struct virtnet_info *vi) 2760 { 2761 int i; 2762 2763 if (vi->affinity_hint_set) { 2764 for (i = 0; i < vi->max_queue_pairs; i++) { 2765 virtqueue_set_affinity(vi->rq[i].vq, NULL); 2766 virtqueue_set_affinity(vi->sq[i].vq, NULL); 2767 } 2768 2769 vi->affinity_hint_set = false; 2770 } 2771 } 2772 2773 static void virtnet_set_affinity(struct virtnet_info *vi) 2774 { 2775 cpumask_var_t mask; 2776 int stragglers; 2777 int group_size; 2778 int i, j, cpu; 2779 int num_cpu; 2780 int stride; 2781 2782 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 2783 virtnet_clean_affinity(vi); 2784 return; 2785 } 2786 2787 num_cpu = num_online_cpus(); 2788 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 2789 stragglers = num_cpu >= vi->curr_queue_pairs ? 2790 num_cpu % vi->curr_queue_pairs : 2791 0; 2792 cpu = cpumask_first(cpu_online_mask); 2793 2794 for (i = 0; i < vi->curr_queue_pairs; i++) { 2795 group_size = stride + (i < stragglers ? 1 : 0); 2796 2797 for (j = 0; j < group_size; j++) { 2798 cpumask_set_cpu(cpu, mask); 2799 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 2800 nr_cpu_ids, false); 2801 } 2802 virtqueue_set_affinity(vi->rq[i].vq, mask); 2803 virtqueue_set_affinity(vi->sq[i].vq, mask); 2804 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 2805 cpumask_clear(mask); 2806 } 2807 2808 vi->affinity_hint_set = true; 2809 free_cpumask_var(mask); 2810 } 2811 2812 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 2813 { 2814 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2815 node); 2816 virtnet_set_affinity(vi); 2817 return 0; 2818 } 2819 2820 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 2821 { 2822 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2823 node_dead); 2824 virtnet_set_affinity(vi); 2825 return 0; 2826 } 2827 2828 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 2829 { 2830 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2831 node); 2832 2833 virtnet_clean_affinity(vi); 2834 return 0; 2835 } 2836 2837 static enum cpuhp_state virtionet_online; 2838 2839 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 2840 { 2841 int ret; 2842 2843 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 2844 if (ret) 2845 return ret; 2846 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2847 &vi->node_dead); 2848 if (!ret) 2849 return ret; 2850 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2851 return ret; 2852 } 2853 2854 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 2855 { 2856 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2857 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2858 &vi->node_dead); 2859 } 2860 2861 static void virtnet_get_ringparam(struct net_device *dev, 2862 struct ethtool_ringparam *ring, 2863 struct kernel_ethtool_ringparam *kernel_ring, 2864 struct netlink_ext_ack *extack) 2865 { 2866 struct virtnet_info *vi = netdev_priv(dev); 2867 2868 ring->rx_max_pending = vi->rq[0].vq->num_max; 2869 ring->tx_max_pending = vi->sq[0].vq->num_max; 2870 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2871 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2872 } 2873 2874 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 2875 u16 vqn, u32 max_usecs, u32 max_packets); 2876 2877 static int virtnet_set_ringparam(struct net_device *dev, 2878 struct ethtool_ringparam *ring, 2879 struct kernel_ethtool_ringparam *kernel_ring, 2880 struct netlink_ext_ack *extack) 2881 { 2882 struct virtnet_info *vi = netdev_priv(dev); 2883 u32 rx_pending, tx_pending; 2884 struct receive_queue *rq; 2885 struct send_queue *sq; 2886 int i, err; 2887 2888 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 2889 return -EINVAL; 2890 2891 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2892 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2893 2894 if (ring->rx_pending == rx_pending && 2895 ring->tx_pending == tx_pending) 2896 return 0; 2897 2898 if (ring->rx_pending > vi->rq[0].vq->num_max) 2899 return -EINVAL; 2900 2901 if (ring->tx_pending > vi->sq[0].vq->num_max) 2902 return -EINVAL; 2903 2904 for (i = 0; i < vi->max_queue_pairs; i++) { 2905 rq = vi->rq + i; 2906 sq = vi->sq + i; 2907 2908 if (ring->tx_pending != tx_pending) { 2909 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 2910 if (err) 2911 return err; 2912 2913 /* Upon disabling and re-enabling a transmit virtqueue, the device must 2914 * set the coalescing parameters of the virtqueue to those configured 2915 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 2916 * did not set any TX coalescing parameters, to 0. 2917 */ 2918 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(i), 2919 vi->intr_coal_tx.max_usecs, 2920 vi->intr_coal_tx.max_packets); 2921 if (err) 2922 return err; 2923 2924 vi->sq[i].intr_coal.max_usecs = vi->intr_coal_tx.max_usecs; 2925 vi->sq[i].intr_coal.max_packets = vi->intr_coal_tx.max_packets; 2926 } 2927 2928 if (ring->rx_pending != rx_pending) { 2929 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 2930 if (err) 2931 return err; 2932 2933 /* The reason is same as the transmit virtqueue reset */ 2934 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(i), 2935 vi->intr_coal_rx.max_usecs, 2936 vi->intr_coal_rx.max_packets); 2937 if (err) 2938 return err; 2939 2940 vi->rq[i].intr_coal.max_usecs = vi->intr_coal_rx.max_usecs; 2941 vi->rq[i].intr_coal.max_packets = vi->intr_coal_rx.max_packets; 2942 } 2943 } 2944 2945 return 0; 2946 } 2947 2948 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 2949 { 2950 struct net_device *dev = vi->dev; 2951 struct scatterlist sgs[4]; 2952 unsigned int sg_buf_size; 2953 2954 /* prepare sgs */ 2955 sg_init_table(sgs, 4); 2956 2957 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 2958 sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size); 2959 2960 sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); 2961 sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size); 2962 2963 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 2964 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 2965 sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size); 2966 2967 sg_buf_size = vi->rss_key_size; 2968 sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); 2969 2970 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2971 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 2972 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) { 2973 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 2974 return false; 2975 } 2976 return true; 2977 } 2978 2979 static void virtnet_init_default_rss(struct virtnet_info *vi) 2980 { 2981 u32 indir_val = 0; 2982 int i = 0; 2983 2984 vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; 2985 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 2986 vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size 2987 ? vi->rss_indir_table_size - 1 : 0; 2988 vi->ctrl->rss.unclassified_queue = 0; 2989 2990 for (; i < vi->rss_indir_table_size; ++i) { 2991 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 2992 vi->ctrl->rss.indirection_table[i] = indir_val; 2993 } 2994 2995 vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 2996 vi->ctrl->rss.hash_key_length = vi->rss_key_size; 2997 2998 netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); 2999 } 3000 3001 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3002 { 3003 info->data = 0; 3004 switch (info->flow_type) { 3005 case TCP_V4_FLOW: 3006 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3007 info->data = RXH_IP_SRC | RXH_IP_DST | 3008 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3009 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3010 info->data = RXH_IP_SRC | RXH_IP_DST; 3011 } 3012 break; 3013 case TCP_V6_FLOW: 3014 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3015 info->data = RXH_IP_SRC | RXH_IP_DST | 3016 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3017 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3018 info->data = RXH_IP_SRC | RXH_IP_DST; 3019 } 3020 break; 3021 case UDP_V4_FLOW: 3022 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3023 info->data = RXH_IP_SRC | RXH_IP_DST | 3024 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3025 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3026 info->data = RXH_IP_SRC | RXH_IP_DST; 3027 } 3028 break; 3029 case UDP_V6_FLOW: 3030 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3031 info->data = RXH_IP_SRC | RXH_IP_DST | 3032 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3033 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3034 info->data = RXH_IP_SRC | RXH_IP_DST; 3035 } 3036 break; 3037 case IPV4_FLOW: 3038 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3039 info->data = RXH_IP_SRC | RXH_IP_DST; 3040 3041 break; 3042 case IPV6_FLOW: 3043 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3044 info->data = RXH_IP_SRC | RXH_IP_DST; 3045 3046 break; 3047 default: 3048 info->data = 0; 3049 break; 3050 } 3051 } 3052 3053 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3054 { 3055 u32 new_hashtypes = vi->rss_hash_types_saved; 3056 bool is_disable = info->data & RXH_DISCARD; 3057 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3058 3059 /* supports only 'sd', 'sdfn' and 'r' */ 3060 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3061 return false; 3062 3063 switch (info->flow_type) { 3064 case TCP_V4_FLOW: 3065 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3066 if (!is_disable) 3067 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3068 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3069 break; 3070 case UDP_V4_FLOW: 3071 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3072 if (!is_disable) 3073 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3074 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3075 break; 3076 case IPV4_FLOW: 3077 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3078 if (!is_disable) 3079 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3080 break; 3081 case TCP_V6_FLOW: 3082 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3083 if (!is_disable) 3084 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3085 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3086 break; 3087 case UDP_V6_FLOW: 3088 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3089 if (!is_disable) 3090 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3091 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3092 break; 3093 case IPV6_FLOW: 3094 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3095 if (!is_disable) 3096 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3097 break; 3098 default: 3099 /* unsupported flow */ 3100 return false; 3101 } 3102 3103 /* if unsupported hashtype was set */ 3104 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3105 return false; 3106 3107 if (new_hashtypes != vi->rss_hash_types_saved) { 3108 vi->rss_hash_types_saved = new_hashtypes; 3109 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3110 if (vi->dev->features & NETIF_F_RXHASH) 3111 return virtnet_commit_rss_command(vi); 3112 } 3113 3114 return true; 3115 } 3116 3117 static void virtnet_get_drvinfo(struct net_device *dev, 3118 struct ethtool_drvinfo *info) 3119 { 3120 struct virtnet_info *vi = netdev_priv(dev); 3121 struct virtio_device *vdev = vi->vdev; 3122 3123 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 3124 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 3125 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 3126 3127 } 3128 3129 /* TODO: Eliminate OOO packets during switching */ 3130 static int virtnet_set_channels(struct net_device *dev, 3131 struct ethtool_channels *channels) 3132 { 3133 struct virtnet_info *vi = netdev_priv(dev); 3134 u16 queue_pairs = channels->combined_count; 3135 int err; 3136 3137 /* We don't support separate rx/tx channels. 3138 * We don't allow setting 'other' channels. 3139 */ 3140 if (channels->rx_count || channels->tx_count || channels->other_count) 3141 return -EINVAL; 3142 3143 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 3144 return -EINVAL; 3145 3146 /* For now we don't support modifying channels while XDP is loaded 3147 * also when XDP is loaded all RX queues have XDP programs so we only 3148 * need to check a single RX queue. 3149 */ 3150 if (vi->rq[0].xdp_prog) 3151 return -EINVAL; 3152 3153 cpus_read_lock(); 3154 err = _virtnet_set_queues(vi, queue_pairs); 3155 if (err) { 3156 cpus_read_unlock(); 3157 goto err; 3158 } 3159 virtnet_set_affinity(vi); 3160 cpus_read_unlock(); 3161 3162 netif_set_real_num_tx_queues(dev, queue_pairs); 3163 netif_set_real_num_rx_queues(dev, queue_pairs); 3164 err: 3165 return err; 3166 } 3167 3168 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 3169 { 3170 struct virtnet_info *vi = netdev_priv(dev); 3171 unsigned int i, j; 3172 u8 *p = data; 3173 3174 switch (stringset) { 3175 case ETH_SS_STATS: 3176 for (i = 0; i < vi->curr_queue_pairs; i++) { 3177 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) 3178 ethtool_sprintf(&p, "rx_queue_%u_%s", i, 3179 virtnet_rq_stats_desc[j].desc); 3180 } 3181 3182 for (i = 0; i < vi->curr_queue_pairs; i++) { 3183 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) 3184 ethtool_sprintf(&p, "tx_queue_%u_%s", i, 3185 virtnet_sq_stats_desc[j].desc); 3186 } 3187 break; 3188 } 3189 } 3190 3191 static int virtnet_get_sset_count(struct net_device *dev, int sset) 3192 { 3193 struct virtnet_info *vi = netdev_priv(dev); 3194 3195 switch (sset) { 3196 case ETH_SS_STATS: 3197 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + 3198 VIRTNET_SQ_STATS_LEN); 3199 default: 3200 return -EOPNOTSUPP; 3201 } 3202 } 3203 3204 static void virtnet_get_ethtool_stats(struct net_device *dev, 3205 struct ethtool_stats *stats, u64 *data) 3206 { 3207 struct virtnet_info *vi = netdev_priv(dev); 3208 unsigned int idx = 0, start, i, j; 3209 const u8 *stats_base; 3210 const u64_stats_t *p; 3211 size_t offset; 3212 3213 for (i = 0; i < vi->curr_queue_pairs; i++) { 3214 struct receive_queue *rq = &vi->rq[i]; 3215 3216 stats_base = (const u8 *)&rq->stats; 3217 do { 3218 start = u64_stats_fetch_begin(&rq->stats.syncp); 3219 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 3220 offset = virtnet_rq_stats_desc[j].offset; 3221 p = (const u64_stats_t *)(stats_base + offset); 3222 data[idx + j] = u64_stats_read(p); 3223 } 3224 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3225 idx += VIRTNET_RQ_STATS_LEN; 3226 } 3227 3228 for (i = 0; i < vi->curr_queue_pairs; i++) { 3229 struct send_queue *sq = &vi->sq[i]; 3230 3231 stats_base = (const u8 *)&sq->stats; 3232 do { 3233 start = u64_stats_fetch_begin(&sq->stats.syncp); 3234 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 3235 offset = virtnet_sq_stats_desc[j].offset; 3236 p = (const u64_stats_t *)(stats_base + offset); 3237 data[idx + j] = u64_stats_read(p); 3238 } 3239 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3240 idx += VIRTNET_SQ_STATS_LEN; 3241 } 3242 } 3243 3244 static void virtnet_get_channels(struct net_device *dev, 3245 struct ethtool_channels *channels) 3246 { 3247 struct virtnet_info *vi = netdev_priv(dev); 3248 3249 channels->combined_count = vi->curr_queue_pairs; 3250 channels->max_combined = vi->max_queue_pairs; 3251 channels->max_other = 0; 3252 channels->rx_count = 0; 3253 channels->tx_count = 0; 3254 channels->other_count = 0; 3255 } 3256 3257 static int virtnet_set_link_ksettings(struct net_device *dev, 3258 const struct ethtool_link_ksettings *cmd) 3259 { 3260 struct virtnet_info *vi = netdev_priv(dev); 3261 3262 return ethtool_virtdev_set_link_ksettings(dev, cmd, 3263 &vi->speed, &vi->duplex); 3264 } 3265 3266 static int virtnet_get_link_ksettings(struct net_device *dev, 3267 struct ethtool_link_ksettings *cmd) 3268 { 3269 struct virtnet_info *vi = netdev_priv(dev); 3270 3271 cmd->base.speed = vi->speed; 3272 cmd->base.duplex = vi->duplex; 3273 cmd->base.port = PORT_OTHER; 3274 3275 return 0; 3276 } 3277 3278 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 3279 struct ethtool_coalesce *ec) 3280 { 3281 struct scatterlist sgs_tx, sgs_rx; 3282 int i; 3283 3284 vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 3285 vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 3286 sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx)); 3287 3288 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3289 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 3290 &sgs_tx)) 3291 return -EINVAL; 3292 3293 /* Save parameters */ 3294 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 3295 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 3296 for (i = 0; i < vi->max_queue_pairs; i++) { 3297 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 3298 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 3299 } 3300 3301 vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 3302 vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 3303 sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx)); 3304 3305 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3306 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 3307 &sgs_rx)) 3308 return -EINVAL; 3309 3310 /* Save parameters */ 3311 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 3312 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 3313 for (i = 0; i < vi->max_queue_pairs; i++) { 3314 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 3315 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 3316 } 3317 3318 return 0; 3319 } 3320 3321 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3322 u16 vqn, u32 max_usecs, u32 max_packets) 3323 { 3324 struct scatterlist sgs; 3325 3326 vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn); 3327 vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs); 3328 vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets); 3329 sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq)); 3330 3331 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3332 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3333 &sgs)) 3334 return -EINVAL; 3335 3336 return 0; 3337 } 3338 3339 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 3340 struct ethtool_coalesce *ec, 3341 u16 queue) 3342 { 3343 int err; 3344 3345 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3346 ec->rx_coalesce_usecs, 3347 ec->rx_max_coalesced_frames); 3348 if (err) 3349 return err; 3350 3351 vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs; 3352 vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames; 3353 3354 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3355 ec->tx_coalesce_usecs, 3356 ec->tx_max_coalesced_frames); 3357 if (err) 3358 return err; 3359 3360 vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs; 3361 vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames; 3362 3363 return 0; 3364 } 3365 3366 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 3367 { 3368 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 3369 * feature is negotiated. 3370 */ 3371 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 3372 return -EOPNOTSUPP; 3373 3374 if (ec->tx_max_coalesced_frames > 1 || 3375 ec->rx_max_coalesced_frames != 1) 3376 return -EINVAL; 3377 3378 return 0; 3379 } 3380 3381 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 3382 int vq_weight, bool *should_update) 3383 { 3384 if (weight ^ vq_weight) { 3385 if (dev_flags & IFF_UP) 3386 return -EBUSY; 3387 *should_update = true; 3388 } 3389 3390 return 0; 3391 } 3392 3393 static int virtnet_set_coalesce(struct net_device *dev, 3394 struct ethtool_coalesce *ec, 3395 struct kernel_ethtool_coalesce *kernel_coal, 3396 struct netlink_ext_ack *extack) 3397 { 3398 struct virtnet_info *vi = netdev_priv(dev); 3399 int ret, queue_number, napi_weight; 3400 bool update_napi = false; 3401 3402 /* Can't change NAPI weight if the link is up */ 3403 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 3404 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 3405 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 3406 vi->sq[queue_number].napi.weight, 3407 &update_napi); 3408 if (ret) 3409 return ret; 3410 3411 if (update_napi) { 3412 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 3413 * updated for the sake of simplicity, which might not be necessary 3414 */ 3415 break; 3416 } 3417 } 3418 3419 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 3420 ret = virtnet_send_notf_coal_cmds(vi, ec); 3421 else 3422 ret = virtnet_coal_params_supported(ec); 3423 3424 if (ret) 3425 return ret; 3426 3427 if (update_napi) { 3428 for (; queue_number < vi->max_queue_pairs; queue_number++) 3429 vi->sq[queue_number].napi.weight = napi_weight; 3430 } 3431 3432 return ret; 3433 } 3434 3435 static int virtnet_get_coalesce(struct net_device *dev, 3436 struct ethtool_coalesce *ec, 3437 struct kernel_ethtool_coalesce *kernel_coal, 3438 struct netlink_ext_ack *extack) 3439 { 3440 struct virtnet_info *vi = netdev_priv(dev); 3441 3442 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 3443 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 3444 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 3445 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 3446 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 3447 } else { 3448 ec->rx_max_coalesced_frames = 1; 3449 3450 if (vi->sq[0].napi.weight) 3451 ec->tx_max_coalesced_frames = 1; 3452 } 3453 3454 return 0; 3455 } 3456 3457 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 3458 u32 queue, 3459 struct ethtool_coalesce *ec) 3460 { 3461 struct virtnet_info *vi = netdev_priv(dev); 3462 int ret, napi_weight; 3463 bool update_napi = false; 3464 3465 if (queue >= vi->max_queue_pairs) 3466 return -EINVAL; 3467 3468 /* Can't change NAPI weight if the link is up */ 3469 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 3470 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 3471 vi->sq[queue].napi.weight, 3472 &update_napi); 3473 if (ret) 3474 return ret; 3475 3476 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3477 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 3478 else 3479 ret = virtnet_coal_params_supported(ec); 3480 3481 if (ret) 3482 return ret; 3483 3484 if (update_napi) 3485 vi->sq[queue].napi.weight = napi_weight; 3486 3487 return 0; 3488 } 3489 3490 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 3491 u32 queue, 3492 struct ethtool_coalesce *ec) 3493 { 3494 struct virtnet_info *vi = netdev_priv(dev); 3495 3496 if (queue >= vi->max_queue_pairs) 3497 return -EINVAL; 3498 3499 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 3500 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 3501 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 3502 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 3503 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 3504 } else { 3505 ec->rx_max_coalesced_frames = 1; 3506 3507 if (vi->sq[queue].napi.weight) 3508 ec->tx_max_coalesced_frames = 1; 3509 } 3510 3511 return 0; 3512 } 3513 3514 static void virtnet_init_settings(struct net_device *dev) 3515 { 3516 struct virtnet_info *vi = netdev_priv(dev); 3517 3518 vi->speed = SPEED_UNKNOWN; 3519 vi->duplex = DUPLEX_UNKNOWN; 3520 } 3521 3522 static void virtnet_update_settings(struct virtnet_info *vi) 3523 { 3524 u32 speed; 3525 u8 duplex; 3526 3527 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3528 return; 3529 3530 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3531 3532 if (ethtool_validate_speed(speed)) 3533 vi->speed = speed; 3534 3535 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3536 3537 if (ethtool_validate_duplex(duplex)) 3538 vi->duplex = duplex; 3539 } 3540 3541 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 3542 { 3543 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 3544 } 3545 3546 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 3547 { 3548 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 3549 } 3550 3551 static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) 3552 { 3553 struct virtnet_info *vi = netdev_priv(dev); 3554 int i; 3555 3556 if (indir) { 3557 for (i = 0; i < vi->rss_indir_table_size; ++i) 3558 indir[i] = vi->ctrl->rss.indirection_table[i]; 3559 } 3560 3561 if (key) 3562 memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); 3563 3564 if (hfunc) 3565 *hfunc = ETH_RSS_HASH_TOP; 3566 3567 return 0; 3568 } 3569 3570 static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) 3571 { 3572 struct virtnet_info *vi = netdev_priv(dev); 3573 bool update = false; 3574 int i; 3575 3576 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 3577 return -EOPNOTSUPP; 3578 3579 if (indir) { 3580 if (!vi->has_rss) 3581 return -EOPNOTSUPP; 3582 3583 for (i = 0; i < vi->rss_indir_table_size; ++i) 3584 vi->ctrl->rss.indirection_table[i] = indir[i]; 3585 update = true; 3586 } 3587 if (key) { 3588 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 3589 * device provides hash calculation capabilities, that is, 3590 * hash_key is configured. 3591 */ 3592 if (!vi->has_rss && !vi->has_rss_hash_report) 3593 return -EOPNOTSUPP; 3594 3595 memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); 3596 update = true; 3597 } 3598 3599 if (update) 3600 virtnet_commit_rss_command(vi); 3601 3602 return 0; 3603 } 3604 3605 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 3606 { 3607 struct virtnet_info *vi = netdev_priv(dev); 3608 int rc = 0; 3609 3610 switch (info->cmd) { 3611 case ETHTOOL_GRXRINGS: 3612 info->data = vi->curr_queue_pairs; 3613 break; 3614 case ETHTOOL_GRXFH: 3615 virtnet_get_hashflow(vi, info); 3616 break; 3617 default: 3618 rc = -EOPNOTSUPP; 3619 } 3620 3621 return rc; 3622 } 3623 3624 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 3625 { 3626 struct virtnet_info *vi = netdev_priv(dev); 3627 int rc = 0; 3628 3629 switch (info->cmd) { 3630 case ETHTOOL_SRXFH: 3631 if (!virtnet_set_hashflow(vi, info)) 3632 rc = -EINVAL; 3633 3634 break; 3635 default: 3636 rc = -EOPNOTSUPP; 3637 } 3638 3639 return rc; 3640 } 3641 3642 static const struct ethtool_ops virtnet_ethtool_ops = { 3643 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 3644 ETHTOOL_COALESCE_USECS, 3645 .get_drvinfo = virtnet_get_drvinfo, 3646 .get_link = ethtool_op_get_link, 3647 .get_ringparam = virtnet_get_ringparam, 3648 .set_ringparam = virtnet_set_ringparam, 3649 .get_strings = virtnet_get_strings, 3650 .get_sset_count = virtnet_get_sset_count, 3651 .get_ethtool_stats = virtnet_get_ethtool_stats, 3652 .set_channels = virtnet_set_channels, 3653 .get_channels = virtnet_get_channels, 3654 .get_ts_info = ethtool_op_get_ts_info, 3655 .get_link_ksettings = virtnet_get_link_ksettings, 3656 .set_link_ksettings = virtnet_set_link_ksettings, 3657 .set_coalesce = virtnet_set_coalesce, 3658 .get_coalesce = virtnet_get_coalesce, 3659 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 3660 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 3661 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 3662 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 3663 .get_rxfh = virtnet_get_rxfh, 3664 .set_rxfh = virtnet_set_rxfh, 3665 .get_rxnfc = virtnet_get_rxnfc, 3666 .set_rxnfc = virtnet_set_rxnfc, 3667 }; 3668 3669 static void virtnet_freeze_down(struct virtio_device *vdev) 3670 { 3671 struct virtnet_info *vi = vdev->priv; 3672 3673 /* Make sure no work handler is accessing the device */ 3674 flush_work(&vi->config_work); 3675 3676 netif_tx_lock_bh(vi->dev); 3677 netif_device_detach(vi->dev); 3678 netif_tx_unlock_bh(vi->dev); 3679 if (netif_running(vi->dev)) 3680 virtnet_close(vi->dev); 3681 } 3682 3683 static int init_vqs(struct virtnet_info *vi); 3684 3685 static int virtnet_restore_up(struct virtio_device *vdev) 3686 { 3687 struct virtnet_info *vi = vdev->priv; 3688 int err; 3689 3690 err = init_vqs(vi); 3691 if (err) 3692 return err; 3693 3694 virtio_device_ready(vdev); 3695 3696 enable_delayed_refill(vi); 3697 3698 if (netif_running(vi->dev)) { 3699 err = virtnet_open(vi->dev); 3700 if (err) 3701 return err; 3702 } 3703 3704 netif_tx_lock_bh(vi->dev); 3705 netif_device_attach(vi->dev); 3706 netif_tx_unlock_bh(vi->dev); 3707 return err; 3708 } 3709 3710 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 3711 { 3712 struct scatterlist sg; 3713 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); 3714 3715 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); 3716 3717 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 3718 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 3719 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 3720 return -EINVAL; 3721 } 3722 3723 return 0; 3724 } 3725 3726 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 3727 { 3728 u64 offloads = 0; 3729 3730 if (!vi->guest_offloads) 3731 return 0; 3732 3733 return virtnet_set_guest_offloads(vi, offloads); 3734 } 3735 3736 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 3737 { 3738 u64 offloads = vi->guest_offloads; 3739 3740 if (!vi->guest_offloads) 3741 return 0; 3742 3743 return virtnet_set_guest_offloads(vi, offloads); 3744 } 3745 3746 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 3747 struct netlink_ext_ack *extack) 3748 { 3749 unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 3750 sizeof(struct skb_shared_info)); 3751 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 3752 struct virtnet_info *vi = netdev_priv(dev); 3753 struct bpf_prog *old_prog; 3754 u16 xdp_qp = 0, curr_qp; 3755 int i, err; 3756 3757 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 3758 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3759 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3760 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3761 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3762 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 3763 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 3764 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 3765 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 3766 return -EOPNOTSUPP; 3767 } 3768 3769 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 3770 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 3771 return -EINVAL; 3772 } 3773 3774 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 3775 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 3776 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 3777 return -EINVAL; 3778 } 3779 3780 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 3781 if (prog) 3782 xdp_qp = nr_cpu_ids; 3783 3784 /* XDP requires extra queues for XDP_TX */ 3785 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 3786 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 3787 curr_qp + xdp_qp, vi->max_queue_pairs); 3788 xdp_qp = 0; 3789 } 3790 3791 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 3792 if (!prog && !old_prog) 3793 return 0; 3794 3795 if (prog) 3796 bpf_prog_add(prog, vi->max_queue_pairs - 1); 3797 3798 /* Make sure NAPI is not using any XDP TX queues for RX. */ 3799 if (netif_running(dev)) { 3800 for (i = 0; i < vi->max_queue_pairs; i++) { 3801 napi_disable(&vi->rq[i].napi); 3802 virtnet_napi_tx_disable(&vi->sq[i].napi); 3803 } 3804 } 3805 3806 if (!prog) { 3807 for (i = 0; i < vi->max_queue_pairs; i++) { 3808 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3809 if (i == 0) 3810 virtnet_restore_guest_offloads(vi); 3811 } 3812 synchronize_net(); 3813 } 3814 3815 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 3816 if (err) 3817 goto err; 3818 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 3819 vi->xdp_queue_pairs = xdp_qp; 3820 3821 if (prog) { 3822 vi->xdp_enabled = true; 3823 for (i = 0; i < vi->max_queue_pairs; i++) { 3824 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3825 if (i == 0 && !old_prog) 3826 virtnet_clear_guest_offloads(vi); 3827 } 3828 if (!old_prog) 3829 xdp_features_set_redirect_target(dev, true); 3830 } else { 3831 xdp_features_clear_redirect_target(dev); 3832 vi->xdp_enabled = false; 3833 } 3834 3835 for (i = 0; i < vi->max_queue_pairs; i++) { 3836 if (old_prog) 3837 bpf_prog_put(old_prog); 3838 if (netif_running(dev)) { 3839 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3840 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3841 &vi->sq[i].napi); 3842 } 3843 } 3844 3845 return 0; 3846 3847 err: 3848 if (!prog) { 3849 virtnet_clear_guest_offloads(vi); 3850 for (i = 0; i < vi->max_queue_pairs; i++) 3851 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 3852 } 3853 3854 if (netif_running(dev)) { 3855 for (i = 0; i < vi->max_queue_pairs; i++) { 3856 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3857 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3858 &vi->sq[i].napi); 3859 } 3860 } 3861 if (prog) 3862 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 3863 return err; 3864 } 3865 3866 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 3867 { 3868 switch (xdp->command) { 3869 case XDP_SETUP_PROG: 3870 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 3871 default: 3872 return -EINVAL; 3873 } 3874 } 3875 3876 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 3877 size_t len) 3878 { 3879 struct virtnet_info *vi = netdev_priv(dev); 3880 int ret; 3881 3882 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3883 return -EOPNOTSUPP; 3884 3885 ret = snprintf(buf, len, "sby"); 3886 if (ret >= len) 3887 return -EOPNOTSUPP; 3888 3889 return 0; 3890 } 3891 3892 static int virtnet_set_features(struct net_device *dev, 3893 netdev_features_t features) 3894 { 3895 struct virtnet_info *vi = netdev_priv(dev); 3896 u64 offloads; 3897 int err; 3898 3899 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 3900 if (vi->xdp_enabled) 3901 return -EBUSY; 3902 3903 if (features & NETIF_F_GRO_HW) 3904 offloads = vi->guest_offloads_capable; 3905 else 3906 offloads = vi->guest_offloads_capable & 3907 ~GUEST_OFFLOAD_GRO_HW_MASK; 3908 3909 err = virtnet_set_guest_offloads(vi, offloads); 3910 if (err) 3911 return err; 3912 vi->guest_offloads = offloads; 3913 } 3914 3915 if ((dev->features ^ features) & NETIF_F_RXHASH) { 3916 if (features & NETIF_F_RXHASH) 3917 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3918 else 3919 vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 3920 3921 if (!virtnet_commit_rss_command(vi)) 3922 return -EINVAL; 3923 } 3924 3925 return 0; 3926 } 3927 3928 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 3929 { 3930 struct virtnet_info *priv = netdev_priv(dev); 3931 struct send_queue *sq = &priv->sq[txqueue]; 3932 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 3933 3934 u64_stats_update_begin(&sq->stats.syncp); 3935 u64_stats_inc(&sq->stats.tx_timeouts); 3936 u64_stats_update_end(&sq->stats.syncp); 3937 3938 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 3939 txqueue, sq->name, sq->vq->index, sq->vq->name, 3940 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 3941 } 3942 3943 static const struct net_device_ops virtnet_netdev = { 3944 .ndo_open = virtnet_open, 3945 .ndo_stop = virtnet_close, 3946 .ndo_start_xmit = start_xmit, 3947 .ndo_validate_addr = eth_validate_addr, 3948 .ndo_set_mac_address = virtnet_set_mac_address, 3949 .ndo_set_rx_mode = virtnet_set_rx_mode, 3950 .ndo_get_stats64 = virtnet_stats, 3951 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 3952 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 3953 .ndo_bpf = virtnet_xdp, 3954 .ndo_xdp_xmit = virtnet_xdp_xmit, 3955 .ndo_features_check = passthru_features_check, 3956 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 3957 .ndo_set_features = virtnet_set_features, 3958 .ndo_tx_timeout = virtnet_tx_timeout, 3959 }; 3960 3961 static void virtnet_config_changed_work(struct work_struct *work) 3962 { 3963 struct virtnet_info *vi = 3964 container_of(work, struct virtnet_info, config_work); 3965 u16 v; 3966 3967 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 3968 struct virtio_net_config, status, &v) < 0) 3969 return; 3970 3971 if (v & VIRTIO_NET_S_ANNOUNCE) { 3972 netdev_notify_peers(vi->dev); 3973 virtnet_ack_link_announce(vi); 3974 } 3975 3976 /* Ignore unknown (future) status bits */ 3977 v &= VIRTIO_NET_S_LINK_UP; 3978 3979 if (vi->status == v) 3980 return; 3981 3982 vi->status = v; 3983 3984 if (vi->status & VIRTIO_NET_S_LINK_UP) { 3985 virtnet_update_settings(vi); 3986 netif_carrier_on(vi->dev); 3987 netif_tx_wake_all_queues(vi->dev); 3988 } else { 3989 netif_carrier_off(vi->dev); 3990 netif_tx_stop_all_queues(vi->dev); 3991 } 3992 } 3993 3994 static void virtnet_config_changed(struct virtio_device *vdev) 3995 { 3996 struct virtnet_info *vi = vdev->priv; 3997 3998 schedule_work(&vi->config_work); 3999 } 4000 4001 static void virtnet_free_queues(struct virtnet_info *vi) 4002 { 4003 int i; 4004 4005 for (i = 0; i < vi->max_queue_pairs; i++) { 4006 __netif_napi_del(&vi->rq[i].napi); 4007 __netif_napi_del(&vi->sq[i].napi); 4008 } 4009 4010 /* We called __netif_napi_del(), 4011 * we need to respect an RCU grace period before freeing vi->rq 4012 */ 4013 synchronize_net(); 4014 4015 kfree(vi->rq); 4016 kfree(vi->sq); 4017 kfree(vi->ctrl); 4018 } 4019 4020 static void _free_receive_bufs(struct virtnet_info *vi) 4021 { 4022 struct bpf_prog *old_prog; 4023 int i; 4024 4025 for (i = 0; i < vi->max_queue_pairs; i++) { 4026 while (vi->rq[i].pages) 4027 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 4028 4029 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 4030 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 4031 if (old_prog) 4032 bpf_prog_put(old_prog); 4033 } 4034 } 4035 4036 static void free_receive_bufs(struct virtnet_info *vi) 4037 { 4038 rtnl_lock(); 4039 _free_receive_bufs(vi); 4040 rtnl_unlock(); 4041 } 4042 4043 static void free_receive_page_frags(struct virtnet_info *vi) 4044 { 4045 int i; 4046 for (i = 0; i < vi->max_queue_pairs; i++) 4047 if (vi->rq[i].alloc_frag.page) { 4048 if (vi->rq[i].do_dma && vi->rq[i].last_dma) 4049 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 4050 put_page(vi->rq[i].alloc_frag.page); 4051 } 4052 } 4053 4054 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 4055 { 4056 if (!is_xdp_frame(buf)) 4057 dev_kfree_skb(buf); 4058 else 4059 xdp_return_frame(ptr_to_xdp(buf)); 4060 } 4061 4062 static void free_unused_bufs(struct virtnet_info *vi) 4063 { 4064 void *buf; 4065 int i; 4066 4067 for (i = 0; i < vi->max_queue_pairs; i++) { 4068 struct virtqueue *vq = vi->sq[i].vq; 4069 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 4070 virtnet_sq_free_unused_buf(vq, buf); 4071 cond_resched(); 4072 } 4073 4074 for (i = 0; i < vi->max_queue_pairs; i++) { 4075 struct virtqueue *vq = vi->rq[i].vq; 4076 4077 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 4078 virtnet_rq_unmap_free_buf(vq, buf); 4079 cond_resched(); 4080 } 4081 } 4082 4083 static void virtnet_del_vqs(struct virtnet_info *vi) 4084 { 4085 struct virtio_device *vdev = vi->vdev; 4086 4087 virtnet_clean_affinity(vi); 4088 4089 vdev->config->del_vqs(vdev); 4090 4091 virtnet_free_queues(vi); 4092 } 4093 4094 /* How large should a single buffer be so a queue full of these can fit at 4095 * least one full packet? 4096 * Logic below assumes the mergeable buffer header is used. 4097 */ 4098 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 4099 { 4100 const unsigned int hdr_len = vi->hdr_len; 4101 unsigned int rq_size = virtqueue_get_vring_size(vq); 4102 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 4103 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 4104 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 4105 4106 return max(max(min_buf_len, hdr_len) - hdr_len, 4107 (unsigned int)GOOD_PACKET_LEN); 4108 } 4109 4110 static int virtnet_find_vqs(struct virtnet_info *vi) 4111 { 4112 vq_callback_t **callbacks; 4113 struct virtqueue **vqs; 4114 const char **names; 4115 int ret = -ENOMEM; 4116 int total_vqs; 4117 bool *ctx; 4118 u16 i; 4119 4120 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 4121 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 4122 * possible control vq. 4123 */ 4124 total_vqs = vi->max_queue_pairs * 2 + 4125 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 4126 4127 /* Allocate space for find_vqs parameters */ 4128 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 4129 if (!vqs) 4130 goto err_vq; 4131 callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL); 4132 if (!callbacks) 4133 goto err_callback; 4134 names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL); 4135 if (!names) 4136 goto err_names; 4137 if (!vi->big_packets || vi->mergeable_rx_bufs) { 4138 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 4139 if (!ctx) 4140 goto err_ctx; 4141 } else { 4142 ctx = NULL; 4143 } 4144 4145 /* Parameters for control virtqueue, if any */ 4146 if (vi->has_cvq) { 4147 callbacks[total_vqs - 1] = NULL; 4148 names[total_vqs - 1] = "control"; 4149 } 4150 4151 /* Allocate/initialize parameters for send/receive virtqueues */ 4152 for (i = 0; i < vi->max_queue_pairs; i++) { 4153 callbacks[rxq2vq(i)] = skb_recv_done; 4154 callbacks[txq2vq(i)] = skb_xmit_done; 4155 sprintf(vi->rq[i].name, "input.%u", i); 4156 sprintf(vi->sq[i].name, "output.%u", i); 4157 names[rxq2vq(i)] = vi->rq[i].name; 4158 names[txq2vq(i)] = vi->sq[i].name; 4159 if (ctx) 4160 ctx[rxq2vq(i)] = true; 4161 } 4162 4163 ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, 4164 names, ctx, NULL); 4165 if (ret) 4166 goto err_find; 4167 4168 if (vi->has_cvq) { 4169 vi->cvq = vqs[total_vqs - 1]; 4170 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 4171 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 4172 } 4173 4174 for (i = 0; i < vi->max_queue_pairs; i++) { 4175 vi->rq[i].vq = vqs[rxq2vq(i)]; 4176 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 4177 vi->sq[i].vq = vqs[txq2vq(i)]; 4178 } 4179 4180 /* run here: ret == 0. */ 4181 4182 4183 err_find: 4184 kfree(ctx); 4185 err_ctx: 4186 kfree(names); 4187 err_names: 4188 kfree(callbacks); 4189 err_callback: 4190 kfree(vqs); 4191 err_vq: 4192 return ret; 4193 } 4194 4195 static int virtnet_alloc_queues(struct virtnet_info *vi) 4196 { 4197 int i; 4198 4199 if (vi->has_cvq) { 4200 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 4201 if (!vi->ctrl) 4202 goto err_ctrl; 4203 } else { 4204 vi->ctrl = NULL; 4205 } 4206 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 4207 if (!vi->sq) 4208 goto err_sq; 4209 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 4210 if (!vi->rq) 4211 goto err_rq; 4212 4213 INIT_DELAYED_WORK(&vi->refill, refill_work); 4214 for (i = 0; i < vi->max_queue_pairs; i++) { 4215 vi->rq[i].pages = NULL; 4216 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 4217 napi_weight); 4218 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 4219 virtnet_poll_tx, 4220 napi_tx ? napi_weight : 0); 4221 4222 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 4223 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 4224 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 4225 4226 u64_stats_init(&vi->rq[i].stats.syncp); 4227 u64_stats_init(&vi->sq[i].stats.syncp); 4228 } 4229 4230 return 0; 4231 4232 err_rq: 4233 kfree(vi->sq); 4234 err_sq: 4235 kfree(vi->ctrl); 4236 err_ctrl: 4237 return -ENOMEM; 4238 } 4239 4240 static int init_vqs(struct virtnet_info *vi) 4241 { 4242 int ret; 4243 4244 /* Allocate send & receive queues */ 4245 ret = virtnet_alloc_queues(vi); 4246 if (ret) 4247 goto err; 4248 4249 ret = virtnet_find_vqs(vi); 4250 if (ret) 4251 goto err_free; 4252 4253 virtnet_rq_set_premapped(vi); 4254 4255 cpus_read_lock(); 4256 virtnet_set_affinity(vi); 4257 cpus_read_unlock(); 4258 4259 return 0; 4260 4261 err_free: 4262 virtnet_free_queues(vi); 4263 err: 4264 return ret; 4265 } 4266 4267 #ifdef CONFIG_SYSFS 4268 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 4269 char *buf) 4270 { 4271 struct virtnet_info *vi = netdev_priv(queue->dev); 4272 unsigned int queue_index = get_netdev_rx_queue_index(queue); 4273 unsigned int headroom = virtnet_get_headroom(vi); 4274 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 4275 struct ewma_pkt_len *avg; 4276 4277 BUG_ON(queue_index >= vi->max_queue_pairs); 4278 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 4279 return sprintf(buf, "%u\n", 4280 get_mergeable_buf_len(&vi->rq[queue_index], avg, 4281 SKB_DATA_ALIGN(headroom + tailroom))); 4282 } 4283 4284 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 4285 __ATTR_RO(mergeable_rx_buffer_size); 4286 4287 static struct attribute *virtio_net_mrg_rx_attrs[] = { 4288 &mergeable_rx_buffer_size_attribute.attr, 4289 NULL 4290 }; 4291 4292 static const struct attribute_group virtio_net_mrg_rx_group = { 4293 .name = "virtio_net", 4294 .attrs = virtio_net_mrg_rx_attrs 4295 }; 4296 #endif 4297 4298 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 4299 unsigned int fbit, 4300 const char *fname, const char *dname) 4301 { 4302 if (!virtio_has_feature(vdev, fbit)) 4303 return false; 4304 4305 dev_err(&vdev->dev, "device advertises feature %s but not %s", 4306 fname, dname); 4307 4308 return true; 4309 } 4310 4311 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 4312 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 4313 4314 static bool virtnet_validate_features(struct virtio_device *vdev) 4315 { 4316 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 4317 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 4318 "VIRTIO_NET_F_CTRL_VQ") || 4319 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 4320 "VIRTIO_NET_F_CTRL_VQ") || 4321 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 4322 "VIRTIO_NET_F_CTRL_VQ") || 4323 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 4324 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 4325 "VIRTIO_NET_F_CTRL_VQ") || 4326 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 4327 "VIRTIO_NET_F_CTRL_VQ") || 4328 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 4329 "VIRTIO_NET_F_CTRL_VQ") || 4330 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 4331 "VIRTIO_NET_F_CTRL_VQ") || 4332 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 4333 "VIRTIO_NET_F_CTRL_VQ"))) { 4334 return false; 4335 } 4336 4337 return true; 4338 } 4339 4340 #define MIN_MTU ETH_MIN_MTU 4341 #define MAX_MTU ETH_MAX_MTU 4342 4343 static int virtnet_validate(struct virtio_device *vdev) 4344 { 4345 if (!vdev->config->get) { 4346 dev_err(&vdev->dev, "%s failure: config access disabled\n", 4347 __func__); 4348 return -EINVAL; 4349 } 4350 4351 if (!virtnet_validate_features(vdev)) 4352 return -EINVAL; 4353 4354 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 4355 int mtu = virtio_cread16(vdev, 4356 offsetof(struct virtio_net_config, 4357 mtu)); 4358 if (mtu < MIN_MTU) 4359 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 4360 } 4361 4362 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 4363 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 4364 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 4365 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 4366 } 4367 4368 return 0; 4369 } 4370 4371 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 4372 { 4373 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 4374 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 4375 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 4376 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 4377 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 4378 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 4379 } 4380 4381 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 4382 { 4383 bool guest_gso = virtnet_check_guest_gso(vi); 4384 4385 /* If device can receive ANY guest GSO packets, regardless of mtu, 4386 * allocate packets of maximum size, otherwise limit it to only 4387 * mtu size worth only. 4388 */ 4389 if (mtu > ETH_DATA_LEN || guest_gso) { 4390 vi->big_packets = true; 4391 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 4392 } 4393 } 4394 4395 static int virtnet_probe(struct virtio_device *vdev) 4396 { 4397 int i, err = -ENOMEM; 4398 struct net_device *dev; 4399 struct virtnet_info *vi; 4400 u16 max_queue_pairs; 4401 int mtu = 0; 4402 4403 /* Find if host supports multiqueue/rss virtio_net device */ 4404 max_queue_pairs = 1; 4405 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 4406 max_queue_pairs = 4407 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 4408 4409 /* We need at least 2 queue's */ 4410 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 4411 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 4412 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 4413 max_queue_pairs = 1; 4414 4415 /* Allocate ourselves a network device with room for our info */ 4416 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 4417 if (!dev) 4418 return -ENOMEM; 4419 4420 /* Set up network device as normal. */ 4421 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 4422 IFF_TX_SKB_NO_LINEAR; 4423 dev->netdev_ops = &virtnet_netdev; 4424 dev->features = NETIF_F_HIGHDMA; 4425 4426 dev->ethtool_ops = &virtnet_ethtool_ops; 4427 SET_NETDEV_DEV(dev, &vdev->dev); 4428 4429 /* Do we support "hardware" checksums? */ 4430 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 4431 /* This opens up the world of extra features. */ 4432 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 4433 if (csum) 4434 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 4435 4436 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 4437 dev->hw_features |= NETIF_F_TSO 4438 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 4439 } 4440 /* Individual feature bits: what can host handle? */ 4441 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 4442 dev->hw_features |= NETIF_F_TSO; 4443 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 4444 dev->hw_features |= NETIF_F_TSO6; 4445 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 4446 dev->hw_features |= NETIF_F_TSO_ECN; 4447 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 4448 dev->hw_features |= NETIF_F_GSO_UDP_L4; 4449 4450 dev->features |= NETIF_F_GSO_ROBUST; 4451 4452 if (gso) 4453 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 4454 /* (!csum && gso) case will be fixed by register_netdev() */ 4455 } 4456 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) 4457 dev->features |= NETIF_F_RXCSUM; 4458 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 4459 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 4460 dev->features |= NETIF_F_GRO_HW; 4461 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 4462 dev->hw_features |= NETIF_F_GRO_HW; 4463 4464 dev->vlan_features = dev->features; 4465 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 4466 4467 /* MTU range: 68 - 65535 */ 4468 dev->min_mtu = MIN_MTU; 4469 dev->max_mtu = MAX_MTU; 4470 4471 /* Configuration may specify what MAC to use. Otherwise random. */ 4472 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 4473 u8 addr[ETH_ALEN]; 4474 4475 virtio_cread_bytes(vdev, 4476 offsetof(struct virtio_net_config, mac), 4477 addr, ETH_ALEN); 4478 eth_hw_addr_set(dev, addr); 4479 } else { 4480 eth_hw_addr_random(dev); 4481 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 4482 dev->dev_addr); 4483 } 4484 4485 /* Set up our device-specific information */ 4486 vi = netdev_priv(dev); 4487 vi->dev = dev; 4488 vi->vdev = vdev; 4489 vdev->priv = vi; 4490 4491 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 4492 spin_lock_init(&vi->refill_lock); 4493 4494 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 4495 vi->mergeable_rx_bufs = true; 4496 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 4497 } 4498 4499 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 4500 vi->intr_coal_rx.max_usecs = 0; 4501 vi->intr_coal_tx.max_usecs = 0; 4502 vi->intr_coal_tx.max_packets = 0; 4503 vi->intr_coal_rx.max_packets = 0; 4504 } 4505 4506 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 4507 vi->has_rss_hash_report = true; 4508 4509 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 4510 vi->has_rss = true; 4511 4512 vi->rss_indir_table_size = 4513 virtio_cread16(vdev, offsetof(struct virtio_net_config, 4514 rss_max_indirection_table_length)); 4515 } 4516 4517 if (vi->has_rss || vi->has_rss_hash_report) { 4518 vi->rss_key_size = 4519 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 4520 4521 vi->rss_hash_types_supported = 4522 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 4523 vi->rss_hash_types_supported &= 4524 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 4525 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 4526 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 4527 4528 dev->hw_features |= NETIF_F_RXHASH; 4529 } 4530 4531 if (vi->has_rss_hash_report) 4532 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 4533 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 4534 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 4535 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 4536 else 4537 vi->hdr_len = sizeof(struct virtio_net_hdr); 4538 4539 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 4540 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 4541 vi->any_header_sg = true; 4542 4543 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 4544 vi->has_cvq = true; 4545 4546 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 4547 mtu = virtio_cread16(vdev, 4548 offsetof(struct virtio_net_config, 4549 mtu)); 4550 if (mtu < dev->min_mtu) { 4551 /* Should never trigger: MTU was previously validated 4552 * in virtnet_validate. 4553 */ 4554 dev_err(&vdev->dev, 4555 "device MTU appears to have changed it is now %d < %d", 4556 mtu, dev->min_mtu); 4557 err = -EINVAL; 4558 goto free; 4559 } 4560 4561 dev->mtu = mtu; 4562 dev->max_mtu = mtu; 4563 } 4564 4565 virtnet_set_big_packets(vi, mtu); 4566 4567 if (vi->any_header_sg) 4568 dev->needed_headroom = vi->hdr_len; 4569 4570 /* Enable multiqueue by default */ 4571 if (num_online_cpus() >= max_queue_pairs) 4572 vi->curr_queue_pairs = max_queue_pairs; 4573 else 4574 vi->curr_queue_pairs = num_online_cpus(); 4575 vi->max_queue_pairs = max_queue_pairs; 4576 4577 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 4578 err = init_vqs(vi); 4579 if (err) 4580 goto free; 4581 4582 #ifdef CONFIG_SYSFS 4583 if (vi->mergeable_rx_bufs) 4584 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 4585 #endif 4586 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 4587 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 4588 4589 virtnet_init_settings(dev); 4590 4591 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 4592 vi->failover = net_failover_create(vi->dev); 4593 if (IS_ERR(vi->failover)) { 4594 err = PTR_ERR(vi->failover); 4595 goto free_vqs; 4596 } 4597 } 4598 4599 if (vi->has_rss || vi->has_rss_hash_report) 4600 virtnet_init_default_rss(vi); 4601 4602 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 4603 rtnl_lock(); 4604 4605 err = register_netdevice(dev); 4606 if (err) { 4607 pr_debug("virtio_net: registering device failed\n"); 4608 rtnl_unlock(); 4609 goto free_failover; 4610 } 4611 4612 virtio_device_ready(vdev); 4613 4614 _virtnet_set_queues(vi, vi->curr_queue_pairs); 4615 4616 /* a random MAC address has been assigned, notify the device. 4617 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 4618 * because many devices work fine without getting MAC explicitly 4619 */ 4620 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 4621 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 4622 struct scatterlist sg; 4623 4624 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 4625 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 4626 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 4627 pr_debug("virtio_net: setting MAC address failed\n"); 4628 rtnl_unlock(); 4629 err = -EINVAL; 4630 goto free_unregister_netdev; 4631 } 4632 } 4633 4634 rtnl_unlock(); 4635 4636 err = virtnet_cpu_notif_add(vi); 4637 if (err) { 4638 pr_debug("virtio_net: registering cpu notifier failed\n"); 4639 goto free_unregister_netdev; 4640 } 4641 4642 /* Assume link up if device can't report link status, 4643 otherwise get link status from config. */ 4644 netif_carrier_off(dev); 4645 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 4646 schedule_work(&vi->config_work); 4647 } else { 4648 vi->status = VIRTIO_NET_S_LINK_UP; 4649 virtnet_update_settings(vi); 4650 netif_carrier_on(dev); 4651 } 4652 4653 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 4654 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 4655 set_bit(guest_offloads[i], &vi->guest_offloads); 4656 vi->guest_offloads_capable = vi->guest_offloads; 4657 4658 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 4659 dev->name, max_queue_pairs); 4660 4661 return 0; 4662 4663 free_unregister_netdev: 4664 unregister_netdev(dev); 4665 free_failover: 4666 net_failover_destroy(vi->failover); 4667 free_vqs: 4668 virtio_reset_device(vdev); 4669 cancel_delayed_work_sync(&vi->refill); 4670 free_receive_page_frags(vi); 4671 virtnet_del_vqs(vi); 4672 free: 4673 free_netdev(dev); 4674 return err; 4675 } 4676 4677 static void remove_vq_common(struct virtnet_info *vi) 4678 { 4679 virtio_reset_device(vi->vdev); 4680 4681 /* Free unused buffers in both send and recv, if any. */ 4682 free_unused_bufs(vi); 4683 4684 free_receive_bufs(vi); 4685 4686 free_receive_page_frags(vi); 4687 4688 virtnet_del_vqs(vi); 4689 } 4690 4691 static void virtnet_remove(struct virtio_device *vdev) 4692 { 4693 struct virtnet_info *vi = vdev->priv; 4694 4695 virtnet_cpu_notif_remove(vi); 4696 4697 /* Make sure no work handler is accessing the device. */ 4698 flush_work(&vi->config_work); 4699 4700 unregister_netdev(vi->dev); 4701 4702 net_failover_destroy(vi->failover); 4703 4704 remove_vq_common(vi); 4705 4706 free_netdev(vi->dev); 4707 } 4708 4709 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 4710 { 4711 struct virtnet_info *vi = vdev->priv; 4712 4713 virtnet_cpu_notif_remove(vi); 4714 virtnet_freeze_down(vdev); 4715 remove_vq_common(vi); 4716 4717 return 0; 4718 } 4719 4720 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 4721 { 4722 struct virtnet_info *vi = vdev->priv; 4723 int err; 4724 4725 err = virtnet_restore_up(vdev); 4726 if (err) 4727 return err; 4728 virtnet_set_queues(vi, vi->curr_queue_pairs); 4729 4730 err = virtnet_cpu_notif_add(vi); 4731 if (err) { 4732 virtnet_freeze_down(vdev); 4733 remove_vq_common(vi); 4734 return err; 4735 } 4736 4737 return 0; 4738 } 4739 4740 static struct virtio_device_id id_table[] = { 4741 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 4742 { 0 }, 4743 }; 4744 4745 #define VIRTNET_FEATURES \ 4746 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 4747 VIRTIO_NET_F_MAC, \ 4748 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 4749 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 4750 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 4751 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 4752 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 4753 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 4754 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 4755 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 4756 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 4757 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 4758 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 4759 VIRTIO_NET_F_VQ_NOTF_COAL, \ 4760 VIRTIO_NET_F_GUEST_HDRLEN 4761 4762 static unsigned int features[] = { 4763 VIRTNET_FEATURES, 4764 }; 4765 4766 static unsigned int features_legacy[] = { 4767 VIRTNET_FEATURES, 4768 VIRTIO_NET_F_GSO, 4769 VIRTIO_F_ANY_LAYOUT, 4770 }; 4771 4772 static struct virtio_driver virtio_net_driver = { 4773 .feature_table = features, 4774 .feature_table_size = ARRAY_SIZE(features), 4775 .feature_table_legacy = features_legacy, 4776 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 4777 .driver.name = KBUILD_MODNAME, 4778 .driver.owner = THIS_MODULE, 4779 .id_table = id_table, 4780 .validate = virtnet_validate, 4781 .probe = virtnet_probe, 4782 .remove = virtnet_remove, 4783 .config_changed = virtnet_config_changed, 4784 #ifdef CONFIG_PM_SLEEP 4785 .freeze = virtnet_freeze, 4786 .restore = virtnet_restore, 4787 #endif 4788 }; 4789 4790 static __init int virtio_net_driver_init(void) 4791 { 4792 int ret; 4793 4794 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 4795 virtnet_cpu_online, 4796 virtnet_cpu_down_prep); 4797 if (ret < 0) 4798 goto out; 4799 virtionet_online = ret; 4800 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 4801 NULL, virtnet_cpu_dead); 4802 if (ret) 4803 goto err_dead; 4804 ret = register_virtio_driver(&virtio_net_driver); 4805 if (ret) 4806 goto err_virtio; 4807 return 0; 4808 err_virtio: 4809 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4810 err_dead: 4811 cpuhp_remove_multi_state(virtionet_online); 4812 out: 4813 return ret; 4814 } 4815 module_init(virtio_net_driver_init); 4816 4817 static __exit void virtio_net_driver_exit(void) 4818 { 4819 unregister_virtio_driver(&virtio_net_driver); 4820 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4821 cpuhp_remove_multi_state(virtionet_online); 4822 } 4823 module_exit(virtio_net_driver_exit); 4824 4825 MODULE_DEVICE_TABLE(virtio, id_table); 4826 MODULE_DESCRIPTION("Virtio network driver"); 4827 MODULE_LICENSE("GPL"); 4828