xref: /openbmc/linux/drivers/net/virtio_net.c (revision 77ab8d5d)
1 /* A network driver using virtio.
2  *
3  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 //#define DEBUG
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/ethtool.h>
22 #include <linux/module.h>
23 #include <linux/virtio.h>
24 #include <linux/virtio_net.h>
25 #include <linux/bpf.h>
26 #include <linux/bpf_trace.h>
27 #include <linux/scatterlist.h>
28 #include <linux/if_vlan.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/average.h>
32 #include <linux/filter.h>
33 #include <linux/netdevice.h>
34 #include <linux/pci.h>
35 #include <net/route.h>
36 #include <net/xdp.h>
37 #include <net/net_failover.h>
38 
39 static int napi_weight = NAPI_POLL_WEIGHT;
40 module_param(napi_weight, int, 0444);
41 
42 static bool csum = true, gso = true, napi_tx;
43 module_param(csum, bool, 0444);
44 module_param(gso, bool, 0444);
45 module_param(napi_tx, bool, 0644);
46 
47 /* FIXME: MTU in config. */
48 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
49 #define GOOD_COPY_LEN	128
50 
51 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
52 
53 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
54 #define VIRTIO_XDP_HEADROOM 256
55 
56 /* RX packet size EWMA. The average packet size is used to determine the packet
57  * buffer size when refilling RX rings. As the entire RX ring may be refilled
58  * at once, the weight is chosen so that the EWMA will be insensitive to short-
59  * term, transient changes in packet size.
60  */
61 DECLARE_EWMA(pkt_len, 0, 64)
62 
63 #define VIRTNET_DRIVER_VERSION "1.0.0"
64 
65 static const unsigned long guest_offloads[] = {
66 	VIRTIO_NET_F_GUEST_TSO4,
67 	VIRTIO_NET_F_GUEST_TSO6,
68 	VIRTIO_NET_F_GUEST_ECN,
69 	VIRTIO_NET_F_GUEST_UFO
70 };
71 
72 struct virtnet_stat_desc {
73 	char desc[ETH_GSTRING_LEN];
74 	size_t offset;
75 };
76 
77 struct virtnet_sq_stats {
78 	struct u64_stats_sync syncp;
79 	u64 packets;
80 	u64 bytes;
81 };
82 
83 struct virtnet_rq_stats {
84 	struct u64_stats_sync syncp;
85 	u64 packets;
86 	u64 bytes;
87 };
88 
89 #define VIRTNET_SQ_STAT(m)	offsetof(struct virtnet_sq_stats, m)
90 #define VIRTNET_RQ_STAT(m)	offsetof(struct virtnet_rq_stats, m)
91 
92 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
93 	{ "packets",	VIRTNET_SQ_STAT(packets) },
94 	{ "bytes",	VIRTNET_SQ_STAT(bytes) },
95 };
96 
97 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
98 	{ "packets",	VIRTNET_RQ_STAT(packets) },
99 	{ "bytes",	VIRTNET_RQ_STAT(bytes) },
100 };
101 
102 #define VIRTNET_SQ_STATS_LEN	ARRAY_SIZE(virtnet_sq_stats_desc)
103 #define VIRTNET_RQ_STATS_LEN	ARRAY_SIZE(virtnet_rq_stats_desc)
104 
105 /* Internal representation of a send virtqueue */
106 struct send_queue {
107 	/* Virtqueue associated with this send _queue */
108 	struct virtqueue *vq;
109 
110 	/* TX: fragments + linear part + virtio header */
111 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
112 
113 	/* Name of the send queue: output.$index */
114 	char name[40];
115 
116 	struct virtnet_sq_stats stats;
117 
118 	struct napi_struct napi;
119 };
120 
121 /* Internal representation of a receive virtqueue */
122 struct receive_queue {
123 	/* Virtqueue associated with this receive_queue */
124 	struct virtqueue *vq;
125 
126 	struct napi_struct napi;
127 
128 	struct bpf_prog __rcu *xdp_prog;
129 
130 	struct virtnet_rq_stats stats;
131 
132 	/* Chain pages by the private ptr. */
133 	struct page *pages;
134 
135 	/* Average packet length for mergeable receive buffers. */
136 	struct ewma_pkt_len mrg_avg_pkt_len;
137 
138 	/* Page frag for packet buffer allocation. */
139 	struct page_frag alloc_frag;
140 
141 	/* RX: fragments + linear part + virtio header */
142 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
143 
144 	/* Min single buffer size for mergeable buffers case. */
145 	unsigned int min_buf_len;
146 
147 	/* Name of this receive queue: input.$index */
148 	char name[40];
149 
150 	struct xdp_rxq_info xdp_rxq;
151 };
152 
153 /* Control VQ buffers: protected by the rtnl lock */
154 struct control_buf {
155 	struct virtio_net_ctrl_hdr hdr;
156 	virtio_net_ctrl_ack status;
157 	struct virtio_net_ctrl_mq mq;
158 	u8 promisc;
159 	u8 allmulti;
160 	__virtio16 vid;
161 	__virtio64 offloads;
162 };
163 
164 struct virtnet_info {
165 	struct virtio_device *vdev;
166 	struct virtqueue *cvq;
167 	struct net_device *dev;
168 	struct send_queue *sq;
169 	struct receive_queue *rq;
170 	unsigned int status;
171 
172 	/* Max # of queue pairs supported by the device */
173 	u16 max_queue_pairs;
174 
175 	/* # of queue pairs currently used by the driver */
176 	u16 curr_queue_pairs;
177 
178 	/* # of XDP queue pairs currently used by the driver */
179 	u16 xdp_queue_pairs;
180 
181 	/* I like... big packets and I cannot lie! */
182 	bool big_packets;
183 
184 	/* Host will merge rx buffers for big packets (shake it! shake it!) */
185 	bool mergeable_rx_bufs;
186 
187 	/* Has control virtqueue */
188 	bool has_cvq;
189 
190 	/* Host can handle any s/g split between our header and packet data */
191 	bool any_header_sg;
192 
193 	/* Packet virtio header size */
194 	u8 hdr_len;
195 
196 	/* Work struct for refilling if we run low on memory. */
197 	struct delayed_work refill;
198 
199 	/* Work struct for config space updates */
200 	struct work_struct config_work;
201 
202 	/* Does the affinity hint is set for virtqueues? */
203 	bool affinity_hint_set;
204 
205 	/* CPU hotplug instances for online & dead */
206 	struct hlist_node node;
207 	struct hlist_node node_dead;
208 
209 	struct control_buf *ctrl;
210 
211 	/* Ethtool settings */
212 	u8 duplex;
213 	u32 speed;
214 
215 	unsigned long guest_offloads;
216 
217 	/* failover when STANDBY feature enabled */
218 	struct failover *failover;
219 };
220 
221 struct padded_vnet_hdr {
222 	struct virtio_net_hdr_mrg_rxbuf hdr;
223 	/*
224 	 * hdr is in a separate sg buffer, and data sg buffer shares same page
225 	 * with this header sg. This padding makes next sg 16 byte aligned
226 	 * after the header.
227 	 */
228 	char padding[4];
229 };
230 
231 /* Converting between virtqueue no. and kernel tx/rx queue no.
232  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
233  */
234 static int vq2txq(struct virtqueue *vq)
235 {
236 	return (vq->index - 1) / 2;
237 }
238 
239 static int txq2vq(int txq)
240 {
241 	return txq * 2 + 1;
242 }
243 
244 static int vq2rxq(struct virtqueue *vq)
245 {
246 	return vq->index / 2;
247 }
248 
249 static int rxq2vq(int rxq)
250 {
251 	return rxq * 2;
252 }
253 
254 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
255 {
256 	return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
257 }
258 
259 /*
260  * private is used to chain pages for big packets, put the whole
261  * most recent used list in the beginning for reuse
262  */
263 static void give_pages(struct receive_queue *rq, struct page *page)
264 {
265 	struct page *end;
266 
267 	/* Find end of list, sew whole thing into vi->rq.pages. */
268 	for (end = page; end->private; end = (struct page *)end->private);
269 	end->private = (unsigned long)rq->pages;
270 	rq->pages = page;
271 }
272 
273 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
274 {
275 	struct page *p = rq->pages;
276 
277 	if (p) {
278 		rq->pages = (struct page *)p->private;
279 		/* clear private here, it is used to chain pages */
280 		p->private = 0;
281 	} else
282 		p = alloc_page(gfp_mask);
283 	return p;
284 }
285 
286 static void virtqueue_napi_schedule(struct napi_struct *napi,
287 				    struct virtqueue *vq)
288 {
289 	if (napi_schedule_prep(napi)) {
290 		virtqueue_disable_cb(vq);
291 		__napi_schedule(napi);
292 	}
293 }
294 
295 static void virtqueue_napi_complete(struct napi_struct *napi,
296 				    struct virtqueue *vq, int processed)
297 {
298 	int opaque;
299 
300 	opaque = virtqueue_enable_cb_prepare(vq);
301 	if (napi_complete_done(napi, processed)) {
302 		if (unlikely(virtqueue_poll(vq, opaque)))
303 			virtqueue_napi_schedule(napi, vq);
304 	} else {
305 		virtqueue_disable_cb(vq);
306 	}
307 }
308 
309 static void skb_xmit_done(struct virtqueue *vq)
310 {
311 	struct virtnet_info *vi = vq->vdev->priv;
312 	struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
313 
314 	/* Suppress further interrupts. */
315 	virtqueue_disable_cb(vq);
316 
317 	if (napi->weight)
318 		virtqueue_napi_schedule(napi, vq);
319 	else
320 		/* We were probably waiting for more output buffers. */
321 		netif_wake_subqueue(vi->dev, vq2txq(vq));
322 }
323 
324 #define MRG_CTX_HEADER_SHIFT 22
325 static void *mergeable_len_to_ctx(unsigned int truesize,
326 				  unsigned int headroom)
327 {
328 	return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
329 }
330 
331 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
332 {
333 	return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
334 }
335 
336 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
337 {
338 	return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
339 }
340 
341 /* Called from bottom half context */
342 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
343 				   struct receive_queue *rq,
344 				   struct page *page, unsigned int offset,
345 				   unsigned int len, unsigned int truesize)
346 {
347 	struct sk_buff *skb;
348 	struct virtio_net_hdr_mrg_rxbuf *hdr;
349 	unsigned int copy, hdr_len, hdr_padded_len;
350 	char *p;
351 
352 	p = page_address(page) + offset;
353 
354 	/* copy small packet so we can reuse these pages for small data */
355 	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
356 	if (unlikely(!skb))
357 		return NULL;
358 
359 	hdr = skb_vnet_hdr(skb);
360 
361 	hdr_len = vi->hdr_len;
362 	if (vi->mergeable_rx_bufs)
363 		hdr_padded_len = sizeof(*hdr);
364 	else
365 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
366 
367 	memcpy(hdr, p, hdr_len);
368 
369 	len -= hdr_len;
370 	offset += hdr_padded_len;
371 	p += hdr_padded_len;
372 
373 	copy = len;
374 	if (copy > skb_tailroom(skb))
375 		copy = skb_tailroom(skb);
376 	skb_put_data(skb, p, copy);
377 
378 	len -= copy;
379 	offset += copy;
380 
381 	if (vi->mergeable_rx_bufs) {
382 		if (len)
383 			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
384 		else
385 			put_page(page);
386 		return skb;
387 	}
388 
389 	/*
390 	 * Verify that we can indeed put this data into a skb.
391 	 * This is here to handle cases when the device erroneously
392 	 * tries to receive more than is possible. This is usually
393 	 * the case of a broken device.
394 	 */
395 	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
396 		net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
397 		dev_kfree_skb(skb);
398 		return NULL;
399 	}
400 	BUG_ON(offset >= PAGE_SIZE);
401 	while (len) {
402 		unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
403 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
404 				frag_size, truesize);
405 		len -= frag_size;
406 		page = (struct page *)page->private;
407 		offset = 0;
408 	}
409 
410 	if (page)
411 		give_pages(rq, page);
412 
413 	return skb;
414 }
415 
416 static void virtnet_xdp_flush(struct net_device *dev)
417 {
418 	struct virtnet_info *vi = netdev_priv(dev);
419 	struct send_queue *sq;
420 	unsigned int qp;
421 
422 	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
423 	sq = &vi->sq[qp];
424 
425 	virtqueue_kick(sq->vq);
426 }
427 
428 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
429 				   struct send_queue *sq,
430 				   struct xdp_frame *xdpf)
431 {
432 	struct virtio_net_hdr_mrg_rxbuf *hdr;
433 	int err;
434 
435 	/* virtqueue want to use data area in-front of packet */
436 	if (unlikely(xdpf->metasize > 0))
437 		return -EOPNOTSUPP;
438 
439 	if (unlikely(xdpf->headroom < vi->hdr_len))
440 		return -EOVERFLOW;
441 
442 	/* Make room for virtqueue hdr (also change xdpf->headroom?) */
443 	xdpf->data -= vi->hdr_len;
444 	/* Zero header and leave csum up to XDP layers */
445 	hdr = xdpf->data;
446 	memset(hdr, 0, vi->hdr_len);
447 	xdpf->len   += vi->hdr_len;
448 
449 	sg_init_one(sq->sg, xdpf->data, xdpf->len);
450 
451 	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
452 	if (unlikely(err))
453 		return -ENOSPC; /* Caller handle free/refcnt */
454 
455 	return 0;
456 }
457 
458 static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
459 				   struct xdp_frame *xdpf)
460 {
461 	struct xdp_frame *xdpf_sent;
462 	struct send_queue *sq;
463 	unsigned int len;
464 	unsigned int qp;
465 
466 	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
467 	sq = &vi->sq[qp];
468 
469 	/* Free up any pending old buffers before queueing new ones. */
470 	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
471 		xdp_return_frame(xdpf_sent);
472 
473 	return __virtnet_xdp_xmit_one(vi, sq, xdpf);
474 }
475 
476 static int virtnet_xdp_xmit(struct net_device *dev,
477 			    int n, struct xdp_frame **frames)
478 {
479 	struct virtnet_info *vi = netdev_priv(dev);
480 	struct receive_queue *rq = vi->rq;
481 	struct xdp_frame *xdpf_sent;
482 	struct bpf_prog *xdp_prog;
483 	struct send_queue *sq;
484 	unsigned int len;
485 	unsigned int qp;
486 	int drops = 0;
487 	int err;
488 	int i;
489 
490 	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
491 	sq = &vi->sq[qp];
492 
493 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
494 	 * indicate XDP resources have been successfully allocated.
495 	 */
496 	xdp_prog = rcu_dereference(rq->xdp_prog);
497 	if (!xdp_prog)
498 		return -ENXIO;
499 
500 	/* Free up any pending old buffers before queueing new ones. */
501 	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
502 		xdp_return_frame(xdpf_sent);
503 
504 	for (i = 0; i < n; i++) {
505 		struct xdp_frame *xdpf = frames[i];
506 
507 		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
508 		if (err) {
509 			xdp_return_frame_rx_napi(xdpf);
510 			drops++;
511 		}
512 	}
513 	return n - drops;
514 }
515 
516 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
517 {
518 	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
519 }
520 
521 /* We copy the packet for XDP in the following cases:
522  *
523  * 1) Packet is scattered across multiple rx buffers.
524  * 2) Headroom space is insufficient.
525  *
526  * This is inefficient but it's a temporary condition that
527  * we hit right after XDP is enabled and until queue is refilled
528  * with large buffers with sufficient headroom - so it should affect
529  * at most queue size packets.
530  * Afterwards, the conditions to enable
531  * XDP should preclude the underlying device from sending packets
532  * across multiple buffers (num_buf > 1), and we make sure buffers
533  * have enough headroom.
534  */
535 static struct page *xdp_linearize_page(struct receive_queue *rq,
536 				       u16 *num_buf,
537 				       struct page *p,
538 				       int offset,
539 				       int page_off,
540 				       unsigned int *len)
541 {
542 	struct page *page = alloc_page(GFP_ATOMIC);
543 
544 	if (!page)
545 		return NULL;
546 
547 	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
548 	page_off += *len;
549 
550 	while (--*num_buf) {
551 		int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
552 		unsigned int buflen;
553 		void *buf;
554 		int off;
555 
556 		buf = virtqueue_get_buf(rq->vq, &buflen);
557 		if (unlikely(!buf))
558 			goto err_buf;
559 
560 		p = virt_to_head_page(buf);
561 		off = buf - page_address(p);
562 
563 		/* guard against a misconfigured or uncooperative backend that
564 		 * is sending packet larger than the MTU.
565 		 */
566 		if ((page_off + buflen + tailroom) > PAGE_SIZE) {
567 			put_page(p);
568 			goto err_buf;
569 		}
570 
571 		memcpy(page_address(page) + page_off,
572 		       page_address(p) + off, buflen);
573 		page_off += buflen;
574 		put_page(p);
575 	}
576 
577 	/* Headroom does not contribute to packet length */
578 	*len = page_off - VIRTIO_XDP_HEADROOM;
579 	return page;
580 err_buf:
581 	__free_pages(page, 0);
582 	return NULL;
583 }
584 
585 static struct sk_buff *receive_small(struct net_device *dev,
586 				     struct virtnet_info *vi,
587 				     struct receive_queue *rq,
588 				     void *buf, void *ctx,
589 				     unsigned int len,
590 				     bool *xdp_xmit)
591 {
592 	struct sk_buff *skb;
593 	struct bpf_prog *xdp_prog;
594 	unsigned int xdp_headroom = (unsigned long)ctx;
595 	unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
596 	unsigned int headroom = vi->hdr_len + header_offset;
597 	unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
598 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
599 	struct page *page = virt_to_head_page(buf);
600 	unsigned int delta = 0;
601 	struct page *xdp_page;
602 	int err;
603 
604 	len -= vi->hdr_len;
605 
606 	rcu_read_lock();
607 	xdp_prog = rcu_dereference(rq->xdp_prog);
608 	if (xdp_prog) {
609 		struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
610 		struct xdp_frame *xdpf;
611 		struct xdp_buff xdp;
612 		void *orig_data;
613 		u32 act;
614 
615 		if (unlikely(hdr->hdr.gso_type))
616 			goto err_xdp;
617 
618 		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
619 			int offset = buf - page_address(page) + header_offset;
620 			unsigned int tlen = len + vi->hdr_len;
621 			u16 num_buf = 1;
622 
623 			xdp_headroom = virtnet_get_headroom(vi);
624 			header_offset = VIRTNET_RX_PAD + xdp_headroom;
625 			headroom = vi->hdr_len + header_offset;
626 			buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
627 				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
628 			xdp_page = xdp_linearize_page(rq, &num_buf, page,
629 						      offset, header_offset,
630 						      &tlen);
631 			if (!xdp_page)
632 				goto err_xdp;
633 
634 			buf = page_address(xdp_page);
635 			put_page(page);
636 			page = xdp_page;
637 		}
638 
639 		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
640 		xdp.data = xdp.data_hard_start + xdp_headroom;
641 		xdp_set_data_meta_invalid(&xdp);
642 		xdp.data_end = xdp.data + len;
643 		xdp.rxq = &rq->xdp_rxq;
644 		orig_data = xdp.data;
645 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
646 
647 		switch (act) {
648 		case XDP_PASS:
649 			/* Recalculate length in case bpf program changed it */
650 			delta = orig_data - xdp.data;
651 			len = xdp.data_end - xdp.data;
652 			break;
653 		case XDP_TX:
654 			xdpf = convert_to_xdp_frame(&xdp);
655 			if (unlikely(!xdpf))
656 				goto err_xdp;
657 			err = __virtnet_xdp_tx_xmit(vi, xdpf);
658 			if (unlikely(err)) {
659 				trace_xdp_exception(vi->dev, xdp_prog, act);
660 				goto err_xdp;
661 			}
662 			*xdp_xmit = true;
663 			rcu_read_unlock();
664 			goto xdp_xmit;
665 		case XDP_REDIRECT:
666 			err = xdp_do_redirect(dev, &xdp, xdp_prog);
667 			if (err)
668 				goto err_xdp;
669 			*xdp_xmit = true;
670 			rcu_read_unlock();
671 			goto xdp_xmit;
672 		default:
673 			bpf_warn_invalid_xdp_action(act);
674 		case XDP_ABORTED:
675 			trace_xdp_exception(vi->dev, xdp_prog, act);
676 		case XDP_DROP:
677 			goto err_xdp;
678 		}
679 	}
680 	rcu_read_unlock();
681 
682 	skb = build_skb(buf, buflen);
683 	if (!skb) {
684 		put_page(page);
685 		goto err;
686 	}
687 	skb_reserve(skb, headroom - delta);
688 	skb_put(skb, len);
689 	if (!delta) {
690 		buf += header_offset;
691 		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
692 	} /* keep zeroed vnet hdr since packet was changed by bpf */
693 
694 err:
695 	return skb;
696 
697 err_xdp:
698 	rcu_read_unlock();
699 	dev->stats.rx_dropped++;
700 	put_page(page);
701 xdp_xmit:
702 	return NULL;
703 }
704 
705 static struct sk_buff *receive_big(struct net_device *dev,
706 				   struct virtnet_info *vi,
707 				   struct receive_queue *rq,
708 				   void *buf,
709 				   unsigned int len)
710 {
711 	struct page *page = buf;
712 	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
713 
714 	if (unlikely(!skb))
715 		goto err;
716 
717 	return skb;
718 
719 err:
720 	dev->stats.rx_dropped++;
721 	give_pages(rq, page);
722 	return NULL;
723 }
724 
725 static struct sk_buff *receive_mergeable(struct net_device *dev,
726 					 struct virtnet_info *vi,
727 					 struct receive_queue *rq,
728 					 void *buf,
729 					 void *ctx,
730 					 unsigned int len,
731 					 bool *xdp_xmit)
732 {
733 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
734 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
735 	struct page *page = virt_to_head_page(buf);
736 	int offset = buf - page_address(page);
737 	struct sk_buff *head_skb, *curr_skb;
738 	struct bpf_prog *xdp_prog;
739 	unsigned int truesize;
740 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
741 	int err;
742 
743 	head_skb = NULL;
744 
745 	rcu_read_lock();
746 	xdp_prog = rcu_dereference(rq->xdp_prog);
747 	if (xdp_prog) {
748 		struct xdp_frame *xdpf;
749 		struct page *xdp_page;
750 		struct xdp_buff xdp;
751 		void *data;
752 		u32 act;
753 
754 		/* Transient failure which in theory could occur if
755 		 * in-flight packets from before XDP was enabled reach
756 		 * the receive path after XDP is loaded.
757 		 */
758 		if (unlikely(hdr->hdr.gso_type))
759 			goto err_xdp;
760 
761 		/* This happens when rx buffer size is underestimated
762 		 * or headroom is not enough because of the buffer
763 		 * was refilled before XDP is set. This should only
764 		 * happen for the first several packets, so we don't
765 		 * care much about its performance.
766 		 */
767 		if (unlikely(num_buf > 1 ||
768 			     headroom < virtnet_get_headroom(vi))) {
769 			/* linearize data for XDP */
770 			xdp_page = xdp_linearize_page(rq, &num_buf,
771 						      page, offset,
772 						      VIRTIO_XDP_HEADROOM,
773 						      &len);
774 			if (!xdp_page)
775 				goto err_xdp;
776 			offset = VIRTIO_XDP_HEADROOM;
777 		} else {
778 			xdp_page = page;
779 		}
780 
781 		/* Allow consuming headroom but reserve enough space to push
782 		 * the descriptor on if we get an XDP_TX return code.
783 		 */
784 		data = page_address(xdp_page) + offset;
785 		xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
786 		xdp.data = data + vi->hdr_len;
787 		xdp_set_data_meta_invalid(&xdp);
788 		xdp.data_end = xdp.data + (len - vi->hdr_len);
789 		xdp.rxq = &rq->xdp_rxq;
790 
791 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
792 
793 		switch (act) {
794 		case XDP_PASS:
795 			/* recalculate offset to account for any header
796 			 * adjustments. Note other cases do not build an
797 			 * skb and avoid using offset
798 			 */
799 			offset = xdp.data -
800 					page_address(xdp_page) - vi->hdr_len;
801 
802 			/* recalculate len if xdp.data or xdp.data_end were
803 			 * adjusted
804 			 */
805 			len = xdp.data_end - xdp.data + vi->hdr_len;
806 			/* We can only create skb based on xdp_page. */
807 			if (unlikely(xdp_page != page)) {
808 				rcu_read_unlock();
809 				put_page(page);
810 				head_skb = page_to_skb(vi, rq, xdp_page,
811 						       offset, len, PAGE_SIZE);
812 				return head_skb;
813 			}
814 			break;
815 		case XDP_TX:
816 			xdpf = convert_to_xdp_frame(&xdp);
817 			if (unlikely(!xdpf))
818 				goto err_xdp;
819 			err = __virtnet_xdp_tx_xmit(vi, xdpf);
820 			if (unlikely(err)) {
821 				trace_xdp_exception(vi->dev, xdp_prog, act);
822 				if (unlikely(xdp_page != page))
823 					put_page(xdp_page);
824 				goto err_xdp;
825 			}
826 			*xdp_xmit = true;
827 			if (unlikely(xdp_page != page))
828 				put_page(page);
829 			rcu_read_unlock();
830 			goto xdp_xmit;
831 		case XDP_REDIRECT:
832 			err = xdp_do_redirect(dev, &xdp, xdp_prog);
833 			if (err) {
834 				if (unlikely(xdp_page != page))
835 					put_page(xdp_page);
836 				goto err_xdp;
837 			}
838 			*xdp_xmit = true;
839 			if (unlikely(xdp_page != page))
840 				put_page(page);
841 			rcu_read_unlock();
842 			goto xdp_xmit;
843 		default:
844 			bpf_warn_invalid_xdp_action(act);
845 		case XDP_ABORTED:
846 			trace_xdp_exception(vi->dev, xdp_prog, act);
847 		case XDP_DROP:
848 			if (unlikely(xdp_page != page))
849 				__free_pages(xdp_page, 0);
850 			goto err_xdp;
851 		}
852 	}
853 	rcu_read_unlock();
854 
855 	truesize = mergeable_ctx_to_truesize(ctx);
856 	if (unlikely(len > truesize)) {
857 		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
858 			 dev->name, len, (unsigned long)ctx);
859 		dev->stats.rx_length_errors++;
860 		goto err_skb;
861 	}
862 
863 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
864 	curr_skb = head_skb;
865 
866 	if (unlikely(!curr_skb))
867 		goto err_skb;
868 	while (--num_buf) {
869 		int num_skb_frags;
870 
871 		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
872 		if (unlikely(!buf)) {
873 			pr_debug("%s: rx error: %d buffers out of %d missing\n",
874 				 dev->name, num_buf,
875 				 virtio16_to_cpu(vi->vdev,
876 						 hdr->num_buffers));
877 			dev->stats.rx_length_errors++;
878 			goto err_buf;
879 		}
880 
881 		page = virt_to_head_page(buf);
882 
883 		truesize = mergeable_ctx_to_truesize(ctx);
884 		if (unlikely(len > truesize)) {
885 			pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
886 				 dev->name, len, (unsigned long)ctx);
887 			dev->stats.rx_length_errors++;
888 			goto err_skb;
889 		}
890 
891 		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
892 		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
893 			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
894 
895 			if (unlikely(!nskb))
896 				goto err_skb;
897 			if (curr_skb == head_skb)
898 				skb_shinfo(curr_skb)->frag_list = nskb;
899 			else
900 				curr_skb->next = nskb;
901 			curr_skb = nskb;
902 			head_skb->truesize += nskb->truesize;
903 			num_skb_frags = 0;
904 		}
905 		if (curr_skb != head_skb) {
906 			head_skb->data_len += len;
907 			head_skb->len += len;
908 			head_skb->truesize += truesize;
909 		}
910 		offset = buf - page_address(page);
911 		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
912 			put_page(page);
913 			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
914 					     len, truesize);
915 		} else {
916 			skb_add_rx_frag(curr_skb, num_skb_frags, page,
917 					offset, len, truesize);
918 		}
919 	}
920 
921 	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
922 	return head_skb;
923 
924 err_xdp:
925 	rcu_read_unlock();
926 err_skb:
927 	put_page(page);
928 	while (num_buf-- > 1) {
929 		buf = virtqueue_get_buf(rq->vq, &len);
930 		if (unlikely(!buf)) {
931 			pr_debug("%s: rx error: %d buffers missing\n",
932 				 dev->name, num_buf);
933 			dev->stats.rx_length_errors++;
934 			break;
935 		}
936 		page = virt_to_head_page(buf);
937 		put_page(page);
938 	}
939 err_buf:
940 	dev->stats.rx_dropped++;
941 	dev_kfree_skb(head_skb);
942 xdp_xmit:
943 	return NULL;
944 }
945 
946 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
947 		       void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
948 {
949 	struct net_device *dev = vi->dev;
950 	struct sk_buff *skb;
951 	struct virtio_net_hdr_mrg_rxbuf *hdr;
952 	int ret;
953 
954 	if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
955 		pr_debug("%s: short packet %i\n", dev->name, len);
956 		dev->stats.rx_length_errors++;
957 		if (vi->mergeable_rx_bufs) {
958 			put_page(virt_to_head_page(buf));
959 		} else if (vi->big_packets) {
960 			give_pages(rq, buf);
961 		} else {
962 			put_page(virt_to_head_page(buf));
963 		}
964 		return 0;
965 	}
966 
967 	if (vi->mergeable_rx_bufs)
968 		skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
969 	else if (vi->big_packets)
970 		skb = receive_big(dev, vi, rq, buf, len);
971 	else
972 		skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit);
973 
974 	if (unlikely(!skb))
975 		return 0;
976 
977 	hdr = skb_vnet_hdr(skb);
978 
979 	ret = skb->len;
980 
981 	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
982 		skb->ip_summed = CHECKSUM_UNNECESSARY;
983 
984 	if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
985 				  virtio_is_little_endian(vi->vdev))) {
986 		net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
987 				     dev->name, hdr->hdr.gso_type,
988 				     hdr->hdr.gso_size);
989 		goto frame_err;
990 	}
991 
992 	skb->protocol = eth_type_trans(skb, dev);
993 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
994 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
995 
996 	napi_gro_receive(&rq->napi, skb);
997 	return ret;
998 
999 frame_err:
1000 	dev->stats.rx_frame_errors++;
1001 	dev_kfree_skb(skb);
1002 	return 0;
1003 }
1004 
1005 /* Unlike mergeable buffers, all buffers are allocated to the
1006  * same size, except for the headroom. For this reason we do
1007  * not need to use  mergeable_len_to_ctx here - it is enough
1008  * to store the headroom as the context ignoring the truesize.
1009  */
1010 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
1011 			     gfp_t gfp)
1012 {
1013 	struct page_frag *alloc_frag = &rq->alloc_frag;
1014 	char *buf;
1015 	unsigned int xdp_headroom = virtnet_get_headroom(vi);
1016 	void *ctx = (void *)(unsigned long)xdp_headroom;
1017 	int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
1018 	int err;
1019 
1020 	len = SKB_DATA_ALIGN(len) +
1021 	      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1022 	if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
1023 		return -ENOMEM;
1024 
1025 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1026 	get_page(alloc_frag->page);
1027 	alloc_frag->offset += len;
1028 	sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
1029 		    vi->hdr_len + GOOD_PACKET_LEN);
1030 	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
1031 	if (err < 0)
1032 		put_page(virt_to_head_page(buf));
1033 	return err;
1034 }
1035 
1036 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
1037 			   gfp_t gfp)
1038 {
1039 	struct page *first, *list = NULL;
1040 	char *p;
1041 	int i, err, offset;
1042 
1043 	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
1044 
1045 	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
1046 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
1047 		first = get_a_page(rq, gfp);
1048 		if (!first) {
1049 			if (list)
1050 				give_pages(rq, list);
1051 			return -ENOMEM;
1052 		}
1053 		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
1054 
1055 		/* chain new page in list head to match sg */
1056 		first->private = (unsigned long)list;
1057 		list = first;
1058 	}
1059 
1060 	first = get_a_page(rq, gfp);
1061 	if (!first) {
1062 		give_pages(rq, list);
1063 		return -ENOMEM;
1064 	}
1065 	p = page_address(first);
1066 
1067 	/* rq->sg[0], rq->sg[1] share the same page */
1068 	/* a separated rq->sg[0] for header - required in case !any_header_sg */
1069 	sg_set_buf(&rq->sg[0], p, vi->hdr_len);
1070 
1071 	/* rq->sg[1] for data packet, from offset */
1072 	offset = sizeof(struct padded_vnet_hdr);
1073 	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
1074 
1075 	/* chain first in list head */
1076 	first->private = (unsigned long)list;
1077 	err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
1078 				  first, gfp);
1079 	if (err < 0)
1080 		give_pages(rq, first);
1081 
1082 	return err;
1083 }
1084 
1085 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
1086 					  struct ewma_pkt_len *avg_pkt_len,
1087 					  unsigned int room)
1088 {
1089 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1090 	unsigned int len;
1091 
1092 	if (room)
1093 		return PAGE_SIZE - room;
1094 
1095 	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
1096 				rq->min_buf_len, PAGE_SIZE - hdr_len);
1097 
1098 	return ALIGN(len, L1_CACHE_BYTES);
1099 }
1100 
1101 static int add_recvbuf_mergeable(struct virtnet_info *vi,
1102 				 struct receive_queue *rq, gfp_t gfp)
1103 {
1104 	struct page_frag *alloc_frag = &rq->alloc_frag;
1105 	unsigned int headroom = virtnet_get_headroom(vi);
1106 	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1107 	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
1108 	char *buf;
1109 	void *ctx;
1110 	int err;
1111 	unsigned int len, hole;
1112 
1113 	/* Extra tailroom is needed to satisfy XDP's assumption. This
1114 	 * means rx frags coalescing won't work, but consider we've
1115 	 * disabled GSO for XDP, it won't be a big issue.
1116 	 */
1117 	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
1118 	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
1119 		return -ENOMEM;
1120 
1121 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1122 	buf += headroom; /* advance address leaving hole at front of pkt */
1123 	get_page(alloc_frag->page);
1124 	alloc_frag->offset += len + room;
1125 	hole = alloc_frag->size - alloc_frag->offset;
1126 	if (hole < len + room) {
1127 		/* To avoid internal fragmentation, if there is very likely not
1128 		 * enough space for another buffer, add the remaining space to
1129 		 * the current buffer.
1130 		 */
1131 		len += hole;
1132 		alloc_frag->offset += hole;
1133 	}
1134 
1135 	sg_init_one(rq->sg, buf, len);
1136 	ctx = mergeable_len_to_ctx(len, headroom);
1137 	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
1138 	if (err < 0)
1139 		put_page(virt_to_head_page(buf));
1140 
1141 	return err;
1142 }
1143 
1144 /*
1145  * Returns false if we couldn't fill entirely (OOM).
1146  *
1147  * Normally run in the receive path, but can also be run from ndo_open
1148  * before we're receiving packets, or from refill_work which is
1149  * careful to disable receiving (using napi_disable).
1150  */
1151 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
1152 			  gfp_t gfp)
1153 {
1154 	int err;
1155 	bool oom;
1156 
1157 	do {
1158 		if (vi->mergeable_rx_bufs)
1159 			err = add_recvbuf_mergeable(vi, rq, gfp);
1160 		else if (vi->big_packets)
1161 			err = add_recvbuf_big(vi, rq, gfp);
1162 		else
1163 			err = add_recvbuf_small(vi, rq, gfp);
1164 
1165 		oom = err == -ENOMEM;
1166 		if (err)
1167 			break;
1168 	} while (rq->vq->num_free);
1169 	virtqueue_kick(rq->vq);
1170 	return !oom;
1171 }
1172 
1173 static void skb_recv_done(struct virtqueue *rvq)
1174 {
1175 	struct virtnet_info *vi = rvq->vdev->priv;
1176 	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
1177 
1178 	virtqueue_napi_schedule(&rq->napi, rvq);
1179 }
1180 
1181 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
1182 {
1183 	napi_enable(napi);
1184 
1185 	/* If all buffers were filled by other side before we napi_enabled, we
1186 	 * won't get another interrupt, so process any outstanding packets now.
1187 	 * Call local_bh_enable after to trigger softIRQ processing.
1188 	 */
1189 	local_bh_disable();
1190 	virtqueue_napi_schedule(napi, vq);
1191 	local_bh_enable();
1192 }
1193 
1194 static void virtnet_napi_tx_enable(struct virtnet_info *vi,
1195 				   struct virtqueue *vq,
1196 				   struct napi_struct *napi)
1197 {
1198 	if (!napi->weight)
1199 		return;
1200 
1201 	/* Tx napi touches cachelines on the cpu handling tx interrupts. Only
1202 	 * enable the feature if this is likely affine with the transmit path.
1203 	 */
1204 	if (!vi->affinity_hint_set) {
1205 		napi->weight = 0;
1206 		return;
1207 	}
1208 
1209 	return virtnet_napi_enable(vq, napi);
1210 }
1211 
1212 static void virtnet_napi_tx_disable(struct napi_struct *napi)
1213 {
1214 	if (napi->weight)
1215 		napi_disable(napi);
1216 }
1217 
1218 static void refill_work(struct work_struct *work)
1219 {
1220 	struct virtnet_info *vi =
1221 		container_of(work, struct virtnet_info, refill.work);
1222 	bool still_empty;
1223 	int i;
1224 
1225 	for (i = 0; i < vi->curr_queue_pairs; i++) {
1226 		struct receive_queue *rq = &vi->rq[i];
1227 
1228 		napi_disable(&rq->napi);
1229 		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
1230 		virtnet_napi_enable(rq->vq, &rq->napi);
1231 
1232 		/* In theory, this can happen: if we don't get any buffers in
1233 		 * we will *never* try to fill again.
1234 		 */
1235 		if (still_empty)
1236 			schedule_delayed_work(&vi->refill, HZ/2);
1237 	}
1238 }
1239 
1240 static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
1241 {
1242 	struct virtnet_info *vi = rq->vq->vdev->priv;
1243 	unsigned int len, received = 0, bytes = 0;
1244 	void *buf;
1245 
1246 	if (!vi->big_packets || vi->mergeable_rx_bufs) {
1247 		void *ctx;
1248 
1249 		while (received < budget &&
1250 		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1251 			bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
1252 			received++;
1253 		}
1254 	} else {
1255 		while (received < budget &&
1256 		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1257 			bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
1258 			received++;
1259 		}
1260 	}
1261 
1262 	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
1263 		if (!try_fill_recv(vi, rq, GFP_ATOMIC))
1264 			schedule_delayed_work(&vi->refill, 0);
1265 	}
1266 
1267 	u64_stats_update_begin(&rq->stats.syncp);
1268 	rq->stats.bytes += bytes;
1269 	rq->stats.packets += received;
1270 	u64_stats_update_end(&rq->stats.syncp);
1271 
1272 	return received;
1273 }
1274 
1275 static void free_old_xmit_skbs(struct send_queue *sq)
1276 {
1277 	struct sk_buff *skb;
1278 	unsigned int len;
1279 	unsigned int packets = 0;
1280 	unsigned int bytes = 0;
1281 
1282 	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
1283 		pr_debug("Sent skb %p\n", skb);
1284 
1285 		bytes += skb->len;
1286 		packets++;
1287 
1288 		dev_consume_skb_any(skb);
1289 	}
1290 
1291 	/* Avoid overhead when no packets have been processed
1292 	 * happens when called speculatively from start_xmit.
1293 	 */
1294 	if (!packets)
1295 		return;
1296 
1297 	u64_stats_update_begin(&sq->stats.syncp);
1298 	sq->stats.bytes += bytes;
1299 	sq->stats.packets += packets;
1300 	u64_stats_update_end(&sq->stats.syncp);
1301 }
1302 
1303 static void virtnet_poll_cleantx(struct receive_queue *rq)
1304 {
1305 	struct virtnet_info *vi = rq->vq->vdev->priv;
1306 	unsigned int index = vq2rxq(rq->vq);
1307 	struct send_queue *sq = &vi->sq[index];
1308 	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
1309 
1310 	if (!sq->napi.weight)
1311 		return;
1312 
1313 	if (__netif_tx_trylock(txq)) {
1314 		free_old_xmit_skbs(sq);
1315 		__netif_tx_unlock(txq);
1316 	}
1317 
1318 	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1319 		netif_tx_wake_queue(txq);
1320 }
1321 
1322 static int virtnet_poll(struct napi_struct *napi, int budget)
1323 {
1324 	struct receive_queue *rq =
1325 		container_of(napi, struct receive_queue, napi);
1326 	struct virtnet_info *vi = rq->vq->vdev->priv;
1327 	struct send_queue *sq;
1328 	unsigned int received, qp;
1329 	bool xdp_xmit = false;
1330 
1331 	virtnet_poll_cleantx(rq);
1332 
1333 	received = virtnet_receive(rq, budget, &xdp_xmit);
1334 
1335 	/* Out of packets? */
1336 	if (received < budget)
1337 		virtqueue_napi_complete(napi, rq->vq, received);
1338 
1339 	if (xdp_xmit) {
1340 		qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
1341 		     smp_processor_id();
1342 		sq = &vi->sq[qp];
1343 		virtqueue_kick(sq->vq);
1344 		xdp_do_flush_map();
1345 	}
1346 
1347 	return received;
1348 }
1349 
1350 static int virtnet_open(struct net_device *dev)
1351 {
1352 	struct virtnet_info *vi = netdev_priv(dev);
1353 	int i, err;
1354 
1355 	for (i = 0; i < vi->max_queue_pairs; i++) {
1356 		if (i < vi->curr_queue_pairs)
1357 			/* Make sure we have some buffers: if oom use wq. */
1358 			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1359 				schedule_delayed_work(&vi->refill, 0);
1360 
1361 		err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
1362 		if (err < 0)
1363 			return err;
1364 
1365 		err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
1366 						 MEM_TYPE_PAGE_SHARED, NULL);
1367 		if (err < 0) {
1368 			xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1369 			return err;
1370 		}
1371 
1372 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1373 		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
1374 	}
1375 
1376 	return 0;
1377 }
1378 
1379 static int virtnet_poll_tx(struct napi_struct *napi, int budget)
1380 {
1381 	struct send_queue *sq = container_of(napi, struct send_queue, napi);
1382 	struct virtnet_info *vi = sq->vq->vdev->priv;
1383 	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
1384 
1385 	__netif_tx_lock(txq, raw_smp_processor_id());
1386 	free_old_xmit_skbs(sq);
1387 	__netif_tx_unlock(txq);
1388 
1389 	virtqueue_napi_complete(napi, sq->vq, 0);
1390 
1391 	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1392 		netif_tx_wake_queue(txq);
1393 
1394 	return 0;
1395 }
1396 
1397 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
1398 {
1399 	struct virtio_net_hdr_mrg_rxbuf *hdr;
1400 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
1401 	struct virtnet_info *vi = sq->vq->vdev->priv;
1402 	int num_sg;
1403 	unsigned hdr_len = vi->hdr_len;
1404 	bool can_push;
1405 
1406 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
1407 
1408 	can_push = vi->any_header_sg &&
1409 		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
1410 		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
1411 	/* Even if we can, don't push here yet as this would skew
1412 	 * csum_start offset below. */
1413 	if (can_push)
1414 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
1415 	else
1416 		hdr = skb_vnet_hdr(skb);
1417 
1418 	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
1419 				    virtio_is_little_endian(vi->vdev), false))
1420 		BUG();
1421 
1422 	if (vi->mergeable_rx_bufs)
1423 		hdr->num_buffers = 0;
1424 
1425 	sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
1426 	if (can_push) {
1427 		__skb_push(skb, hdr_len);
1428 		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
1429 		if (unlikely(num_sg < 0))
1430 			return num_sg;
1431 		/* Pull header back to avoid skew in tx bytes calculations. */
1432 		__skb_pull(skb, hdr_len);
1433 	} else {
1434 		sg_set_buf(sq->sg, hdr, hdr_len);
1435 		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
1436 		if (unlikely(num_sg < 0))
1437 			return num_sg;
1438 		num_sg++;
1439 	}
1440 	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
1441 }
1442 
1443 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
1444 {
1445 	struct virtnet_info *vi = netdev_priv(dev);
1446 	int qnum = skb_get_queue_mapping(skb);
1447 	struct send_queue *sq = &vi->sq[qnum];
1448 	int err;
1449 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1450 	bool kick = !skb->xmit_more;
1451 	bool use_napi = sq->napi.weight;
1452 
1453 	/* Free up any pending old buffers before queueing new ones. */
1454 	free_old_xmit_skbs(sq);
1455 
1456 	if (use_napi && kick)
1457 		virtqueue_enable_cb_delayed(sq->vq);
1458 
1459 	/* timestamp packet in software */
1460 	skb_tx_timestamp(skb);
1461 
1462 	/* Try to transmit */
1463 	err = xmit_skb(sq, skb);
1464 
1465 	/* This should not happen! */
1466 	if (unlikely(err)) {
1467 		dev->stats.tx_fifo_errors++;
1468 		if (net_ratelimit())
1469 			dev_warn(&dev->dev,
1470 				 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
1471 		dev->stats.tx_dropped++;
1472 		dev_kfree_skb_any(skb);
1473 		return NETDEV_TX_OK;
1474 	}
1475 
1476 	/* Don't wait up for transmitted skbs to be freed. */
1477 	if (!use_napi) {
1478 		skb_orphan(skb);
1479 		nf_reset(skb);
1480 	}
1481 
1482 	/* If running out of space, stop queue to avoid getting packets that we
1483 	 * are then unable to transmit.
1484 	 * An alternative would be to force queuing layer to requeue the skb by
1485 	 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1486 	 * returned in a normal path of operation: it means that driver is not
1487 	 * maintaining the TX queue stop/start state properly, and causes
1488 	 * the stack to do a non-trivial amount of useless work.
1489 	 * Since most packets only take 1 or 2 ring slots, stopping the queue
1490 	 * early means 16 slots are typically wasted.
1491 	 */
1492 	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
1493 		netif_stop_subqueue(dev, qnum);
1494 		if (!use_napi &&
1495 		    unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1496 			/* More just got used, free them then recheck. */
1497 			free_old_xmit_skbs(sq);
1498 			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
1499 				netif_start_subqueue(dev, qnum);
1500 				virtqueue_disable_cb(sq->vq);
1501 			}
1502 		}
1503 	}
1504 
1505 	if (kick || netif_xmit_stopped(txq))
1506 		virtqueue_kick(sq->vq);
1507 
1508 	return NETDEV_TX_OK;
1509 }
1510 
1511 /*
1512  * Send command via the control virtqueue and check status.  Commands
1513  * supported by the hypervisor, as indicated by feature bits, should
1514  * never fail unless improperly formatted.
1515  */
1516 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
1517 				 struct scatterlist *out)
1518 {
1519 	struct scatterlist *sgs[4], hdr, stat;
1520 	unsigned out_num = 0, tmp;
1521 
1522 	/* Caller should know better */
1523 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
1524 
1525 	vi->ctrl->status = ~0;
1526 	vi->ctrl->hdr.class = class;
1527 	vi->ctrl->hdr.cmd = cmd;
1528 	/* Add header */
1529 	sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
1530 	sgs[out_num++] = &hdr;
1531 
1532 	if (out)
1533 		sgs[out_num++] = out;
1534 
1535 	/* Add return status. */
1536 	sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
1537 	sgs[out_num] = &stat;
1538 
1539 	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
1540 	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
1541 
1542 	if (unlikely(!virtqueue_kick(vi->cvq)))
1543 		return vi->ctrl->status == VIRTIO_NET_OK;
1544 
1545 	/* Spin for a response, the kick causes an ioport write, trapping
1546 	 * into the hypervisor, so the request should be handled immediately.
1547 	 */
1548 	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1549 	       !virtqueue_is_broken(vi->cvq))
1550 		cpu_relax();
1551 
1552 	return vi->ctrl->status == VIRTIO_NET_OK;
1553 }
1554 
1555 static int virtnet_set_mac_address(struct net_device *dev, void *p)
1556 {
1557 	struct virtnet_info *vi = netdev_priv(dev);
1558 	struct virtio_device *vdev = vi->vdev;
1559 	int ret;
1560 	struct sockaddr *addr;
1561 	struct scatterlist sg;
1562 
1563 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
1564 		return -EOPNOTSUPP;
1565 
1566 	addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
1567 	if (!addr)
1568 		return -ENOMEM;
1569 
1570 	ret = eth_prepare_mac_addr_change(dev, addr);
1571 	if (ret)
1572 		goto out;
1573 
1574 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1575 		sg_init_one(&sg, addr->sa_data, dev->addr_len);
1576 		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1577 					  VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1578 			dev_warn(&vdev->dev,
1579 				 "Failed to set mac address by vq command.\n");
1580 			ret = -EINVAL;
1581 			goto out;
1582 		}
1583 	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
1584 		   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1585 		unsigned int i;
1586 
1587 		/* Naturally, this has an atomicity problem. */
1588 		for (i = 0; i < dev->addr_len; i++)
1589 			virtio_cwrite8(vdev,
1590 				       offsetof(struct virtio_net_config, mac) +
1591 				       i, addr->sa_data[i]);
1592 	}
1593 
1594 	eth_commit_mac_addr_change(dev, p);
1595 	ret = 0;
1596 
1597 out:
1598 	kfree(addr);
1599 	return ret;
1600 }
1601 
1602 static void virtnet_stats(struct net_device *dev,
1603 			  struct rtnl_link_stats64 *tot)
1604 {
1605 	struct virtnet_info *vi = netdev_priv(dev);
1606 	unsigned int start;
1607 	int i;
1608 
1609 	for (i = 0; i < vi->max_queue_pairs; i++) {
1610 		u64 tpackets, tbytes, rpackets, rbytes;
1611 		struct receive_queue *rq = &vi->rq[i];
1612 		struct send_queue *sq = &vi->sq[i];
1613 
1614 		do {
1615 			start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
1616 			tpackets = sq->stats.packets;
1617 			tbytes   = sq->stats.bytes;
1618 		} while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
1619 
1620 		do {
1621 			start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
1622 			rpackets = rq->stats.packets;
1623 			rbytes   = rq->stats.bytes;
1624 		} while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
1625 
1626 		tot->rx_packets += rpackets;
1627 		tot->tx_packets += tpackets;
1628 		tot->rx_bytes   += rbytes;
1629 		tot->tx_bytes   += tbytes;
1630 	}
1631 
1632 	tot->tx_dropped = dev->stats.tx_dropped;
1633 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1634 	tot->rx_dropped = dev->stats.rx_dropped;
1635 	tot->rx_length_errors = dev->stats.rx_length_errors;
1636 	tot->rx_frame_errors = dev->stats.rx_frame_errors;
1637 }
1638 
1639 #ifdef CONFIG_NET_POLL_CONTROLLER
1640 static void virtnet_netpoll(struct net_device *dev)
1641 {
1642 	struct virtnet_info *vi = netdev_priv(dev);
1643 	int i;
1644 
1645 	for (i = 0; i < vi->curr_queue_pairs; i++)
1646 		napi_schedule(&vi->rq[i].napi);
1647 }
1648 #endif
1649 
1650 static void virtnet_ack_link_announce(struct virtnet_info *vi)
1651 {
1652 	rtnl_lock();
1653 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1654 				  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1655 		dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1656 	rtnl_unlock();
1657 }
1658 
1659 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1660 {
1661 	struct scatterlist sg;
1662 	struct net_device *dev = vi->dev;
1663 
1664 	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1665 		return 0;
1666 
1667 	vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
1668 	sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
1669 
1670 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1671 				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1672 		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1673 			 queue_pairs);
1674 		return -EINVAL;
1675 	} else {
1676 		vi->curr_queue_pairs = queue_pairs;
1677 		/* virtnet_open() will refill when device is going to up. */
1678 		if (dev->flags & IFF_UP)
1679 			schedule_delayed_work(&vi->refill, 0);
1680 	}
1681 
1682 	return 0;
1683 }
1684 
1685 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1686 {
1687 	int err;
1688 
1689 	rtnl_lock();
1690 	err = _virtnet_set_queues(vi, queue_pairs);
1691 	rtnl_unlock();
1692 	return err;
1693 }
1694 
1695 static int virtnet_close(struct net_device *dev)
1696 {
1697 	struct virtnet_info *vi = netdev_priv(dev);
1698 	int i;
1699 
1700 	/* Make sure refill_work doesn't re-enable napi! */
1701 	cancel_delayed_work_sync(&vi->refill);
1702 
1703 	for (i = 0; i < vi->max_queue_pairs; i++) {
1704 		xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1705 		napi_disable(&vi->rq[i].napi);
1706 		virtnet_napi_tx_disable(&vi->sq[i].napi);
1707 	}
1708 
1709 	return 0;
1710 }
1711 
1712 static void virtnet_set_rx_mode(struct net_device *dev)
1713 {
1714 	struct virtnet_info *vi = netdev_priv(dev);
1715 	struct scatterlist sg[2];
1716 	struct virtio_net_ctrl_mac *mac_data;
1717 	struct netdev_hw_addr *ha;
1718 	int uc_count;
1719 	int mc_count;
1720 	void *buf;
1721 	int i;
1722 
1723 	/* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1724 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1725 		return;
1726 
1727 	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
1728 	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1729 
1730 	sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
1731 
1732 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1733 				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
1734 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1735 			 vi->ctrl->promisc ? "en" : "dis");
1736 
1737 	sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
1738 
1739 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1740 				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1741 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1742 			 vi->ctrl->allmulti ? "en" : "dis");
1743 
1744 	uc_count = netdev_uc_count(dev);
1745 	mc_count = netdev_mc_count(dev);
1746 	/* MAC filter - use one buffer for both lists */
1747 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1748 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1749 	mac_data = buf;
1750 	if (!buf)
1751 		return;
1752 
1753 	sg_init_table(sg, 2);
1754 
1755 	/* Store the unicast list and count in the front of the buffer */
1756 	mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
1757 	i = 0;
1758 	netdev_for_each_uc_addr(ha, dev)
1759 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1760 
1761 	sg_set_buf(&sg[0], mac_data,
1762 		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1763 
1764 	/* multicast list and count fill the end */
1765 	mac_data = (void *)&mac_data->macs[uc_count][0];
1766 
1767 	mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1768 	i = 0;
1769 	netdev_for_each_mc_addr(ha, dev)
1770 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1771 
1772 	sg_set_buf(&sg[1], mac_data,
1773 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1774 
1775 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1776 				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1777 		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1778 
1779 	kfree(buf);
1780 }
1781 
1782 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1783 				   __be16 proto, u16 vid)
1784 {
1785 	struct virtnet_info *vi = netdev_priv(dev);
1786 	struct scatterlist sg;
1787 
1788 	vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1789 	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1790 
1791 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1792 				  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1793 		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1794 	return 0;
1795 }
1796 
1797 static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1798 				    __be16 proto, u16 vid)
1799 {
1800 	struct virtnet_info *vi = netdev_priv(dev);
1801 	struct scatterlist sg;
1802 
1803 	vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1804 	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1805 
1806 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1807 				  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1808 		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1809 	return 0;
1810 }
1811 
1812 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1813 {
1814 	int i;
1815 
1816 	if (vi->affinity_hint_set) {
1817 		for (i = 0; i < vi->max_queue_pairs; i++) {
1818 			virtqueue_set_affinity(vi->rq[i].vq, -1);
1819 			virtqueue_set_affinity(vi->sq[i].vq, -1);
1820 		}
1821 
1822 		vi->affinity_hint_set = false;
1823 	}
1824 }
1825 
1826 static void virtnet_set_affinity(struct virtnet_info *vi)
1827 {
1828 	int i;
1829 	int cpu;
1830 
1831 	/* In multiqueue mode, when the number of cpu is equal to the number of
1832 	 * queue pairs, we let the queue pairs to be private to one cpu by
1833 	 * setting the affinity hint to eliminate the contention.
1834 	 */
1835 	if (vi->curr_queue_pairs == 1 ||
1836 	    vi->max_queue_pairs != num_online_cpus()) {
1837 		virtnet_clean_affinity(vi, -1);
1838 		return;
1839 	}
1840 
1841 	i = 0;
1842 	for_each_online_cpu(cpu) {
1843 		virtqueue_set_affinity(vi->rq[i].vq, cpu);
1844 		virtqueue_set_affinity(vi->sq[i].vq, cpu);
1845 		netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
1846 		i++;
1847 	}
1848 
1849 	vi->affinity_hint_set = true;
1850 }
1851 
1852 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
1853 {
1854 	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1855 						   node);
1856 	virtnet_set_affinity(vi);
1857 	return 0;
1858 }
1859 
1860 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
1861 {
1862 	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1863 						   node_dead);
1864 	virtnet_set_affinity(vi);
1865 	return 0;
1866 }
1867 
1868 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
1869 {
1870 	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1871 						   node);
1872 
1873 	virtnet_clean_affinity(vi, cpu);
1874 	return 0;
1875 }
1876 
1877 static enum cpuhp_state virtionet_online;
1878 
1879 static int virtnet_cpu_notif_add(struct virtnet_info *vi)
1880 {
1881 	int ret;
1882 
1883 	ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
1884 	if (ret)
1885 		return ret;
1886 	ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1887 					       &vi->node_dead);
1888 	if (!ret)
1889 		return ret;
1890 	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1891 	return ret;
1892 }
1893 
1894 static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
1895 {
1896 	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1897 	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1898 					    &vi->node_dead);
1899 }
1900 
1901 static void virtnet_get_ringparam(struct net_device *dev,
1902 				struct ethtool_ringparam *ring)
1903 {
1904 	struct virtnet_info *vi = netdev_priv(dev);
1905 
1906 	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1907 	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1908 	ring->rx_pending = ring->rx_max_pending;
1909 	ring->tx_pending = ring->tx_max_pending;
1910 }
1911 
1912 
1913 static void virtnet_get_drvinfo(struct net_device *dev,
1914 				struct ethtool_drvinfo *info)
1915 {
1916 	struct virtnet_info *vi = netdev_priv(dev);
1917 	struct virtio_device *vdev = vi->vdev;
1918 
1919 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1920 	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1921 	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1922 
1923 }
1924 
1925 /* TODO: Eliminate OOO packets during switching */
1926 static int virtnet_set_channels(struct net_device *dev,
1927 				struct ethtool_channels *channels)
1928 {
1929 	struct virtnet_info *vi = netdev_priv(dev);
1930 	u16 queue_pairs = channels->combined_count;
1931 	int err;
1932 
1933 	/* We don't support separate rx/tx channels.
1934 	 * We don't allow setting 'other' channels.
1935 	 */
1936 	if (channels->rx_count || channels->tx_count || channels->other_count)
1937 		return -EINVAL;
1938 
1939 	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
1940 		return -EINVAL;
1941 
1942 	/* For now we don't support modifying channels while XDP is loaded
1943 	 * also when XDP is loaded all RX queues have XDP programs so we only
1944 	 * need to check a single RX queue.
1945 	 */
1946 	if (vi->rq[0].xdp_prog)
1947 		return -EINVAL;
1948 
1949 	get_online_cpus();
1950 	err = _virtnet_set_queues(vi, queue_pairs);
1951 	if (!err) {
1952 		netif_set_real_num_tx_queues(dev, queue_pairs);
1953 		netif_set_real_num_rx_queues(dev, queue_pairs);
1954 
1955 		virtnet_set_affinity(vi);
1956 	}
1957 	put_online_cpus();
1958 
1959 	return err;
1960 }
1961 
1962 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1963 {
1964 	struct virtnet_info *vi = netdev_priv(dev);
1965 	char *p = (char *)data;
1966 	unsigned int i, j;
1967 
1968 	switch (stringset) {
1969 	case ETH_SS_STATS:
1970 		for (i = 0; i < vi->curr_queue_pairs; i++) {
1971 			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
1972 				snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
1973 					 i, virtnet_rq_stats_desc[j].desc);
1974 				p += ETH_GSTRING_LEN;
1975 			}
1976 		}
1977 
1978 		for (i = 0; i < vi->curr_queue_pairs; i++) {
1979 			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
1980 				snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
1981 					 i, virtnet_sq_stats_desc[j].desc);
1982 				p += ETH_GSTRING_LEN;
1983 			}
1984 		}
1985 		break;
1986 	}
1987 }
1988 
1989 static int virtnet_get_sset_count(struct net_device *dev, int sset)
1990 {
1991 	struct virtnet_info *vi = netdev_priv(dev);
1992 
1993 	switch (sset) {
1994 	case ETH_SS_STATS:
1995 		return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
1996 					       VIRTNET_SQ_STATS_LEN);
1997 	default:
1998 		return -EOPNOTSUPP;
1999 	}
2000 }
2001 
2002 static void virtnet_get_ethtool_stats(struct net_device *dev,
2003 				      struct ethtool_stats *stats, u64 *data)
2004 {
2005 	struct virtnet_info *vi = netdev_priv(dev);
2006 	unsigned int idx = 0, start, i, j;
2007 	const u8 *stats_base;
2008 	size_t offset;
2009 
2010 	for (i = 0; i < vi->curr_queue_pairs; i++) {
2011 		struct receive_queue *rq = &vi->rq[i];
2012 
2013 		stats_base = (u8 *)&rq->stats;
2014 		do {
2015 			start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
2016 			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
2017 				offset = virtnet_rq_stats_desc[j].offset;
2018 				data[idx + j] = *(u64 *)(stats_base + offset);
2019 			}
2020 		} while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
2021 		idx += VIRTNET_RQ_STATS_LEN;
2022 	}
2023 
2024 	for (i = 0; i < vi->curr_queue_pairs; i++) {
2025 		struct send_queue *sq = &vi->sq[i];
2026 
2027 		stats_base = (u8 *)&sq->stats;
2028 		do {
2029 			start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
2030 			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
2031 				offset = virtnet_sq_stats_desc[j].offset;
2032 				data[idx + j] = *(u64 *)(stats_base + offset);
2033 			}
2034 		} while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
2035 		idx += VIRTNET_SQ_STATS_LEN;
2036 	}
2037 }
2038 
2039 static void virtnet_get_channels(struct net_device *dev,
2040 				 struct ethtool_channels *channels)
2041 {
2042 	struct virtnet_info *vi = netdev_priv(dev);
2043 
2044 	channels->combined_count = vi->curr_queue_pairs;
2045 	channels->max_combined = vi->max_queue_pairs;
2046 	channels->max_other = 0;
2047 	channels->rx_count = 0;
2048 	channels->tx_count = 0;
2049 	channels->other_count = 0;
2050 }
2051 
2052 /* Check if the user is trying to change anything besides speed/duplex */
2053 static bool
2054 virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
2055 {
2056 	struct ethtool_link_ksettings diff1 = *cmd;
2057 	struct ethtool_link_ksettings diff2 = {};
2058 
2059 	/* cmd is always set so we need to clear it, validate the port type
2060 	 * and also without autonegotiation we can ignore advertising
2061 	 */
2062 	diff1.base.speed = 0;
2063 	diff2.base.port = PORT_OTHER;
2064 	ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
2065 	diff1.base.duplex = 0;
2066 	diff1.base.cmd = 0;
2067 	diff1.base.link_mode_masks_nwords = 0;
2068 
2069 	return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
2070 		bitmap_empty(diff1.link_modes.supported,
2071 			     __ETHTOOL_LINK_MODE_MASK_NBITS) &&
2072 		bitmap_empty(diff1.link_modes.advertising,
2073 			     __ETHTOOL_LINK_MODE_MASK_NBITS) &&
2074 		bitmap_empty(diff1.link_modes.lp_advertising,
2075 			     __ETHTOOL_LINK_MODE_MASK_NBITS);
2076 }
2077 
2078 static int virtnet_set_link_ksettings(struct net_device *dev,
2079 				      const struct ethtool_link_ksettings *cmd)
2080 {
2081 	struct virtnet_info *vi = netdev_priv(dev);
2082 	u32 speed;
2083 
2084 	speed = cmd->base.speed;
2085 	/* don't allow custom speed and duplex */
2086 	if (!ethtool_validate_speed(speed) ||
2087 	    !ethtool_validate_duplex(cmd->base.duplex) ||
2088 	    !virtnet_validate_ethtool_cmd(cmd))
2089 		return -EINVAL;
2090 	vi->speed = speed;
2091 	vi->duplex = cmd->base.duplex;
2092 
2093 	return 0;
2094 }
2095 
2096 static int virtnet_get_link_ksettings(struct net_device *dev,
2097 				      struct ethtool_link_ksettings *cmd)
2098 {
2099 	struct virtnet_info *vi = netdev_priv(dev);
2100 
2101 	cmd->base.speed = vi->speed;
2102 	cmd->base.duplex = vi->duplex;
2103 	cmd->base.port = PORT_OTHER;
2104 
2105 	return 0;
2106 }
2107 
2108 static void virtnet_init_settings(struct net_device *dev)
2109 {
2110 	struct virtnet_info *vi = netdev_priv(dev);
2111 
2112 	vi->speed = SPEED_UNKNOWN;
2113 	vi->duplex = DUPLEX_UNKNOWN;
2114 }
2115 
2116 static void virtnet_update_settings(struct virtnet_info *vi)
2117 {
2118 	u32 speed;
2119 	u8 duplex;
2120 
2121 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
2122 		return;
2123 
2124 	speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config,
2125 						  speed));
2126 	if (ethtool_validate_speed(speed))
2127 		vi->speed = speed;
2128 	duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config,
2129 						  duplex));
2130 	if (ethtool_validate_duplex(duplex))
2131 		vi->duplex = duplex;
2132 }
2133 
2134 static const struct ethtool_ops virtnet_ethtool_ops = {
2135 	.get_drvinfo = virtnet_get_drvinfo,
2136 	.get_link = ethtool_op_get_link,
2137 	.get_ringparam = virtnet_get_ringparam,
2138 	.get_strings = virtnet_get_strings,
2139 	.get_sset_count = virtnet_get_sset_count,
2140 	.get_ethtool_stats = virtnet_get_ethtool_stats,
2141 	.set_channels = virtnet_set_channels,
2142 	.get_channels = virtnet_get_channels,
2143 	.get_ts_info = ethtool_op_get_ts_info,
2144 	.get_link_ksettings = virtnet_get_link_ksettings,
2145 	.set_link_ksettings = virtnet_set_link_ksettings,
2146 };
2147 
2148 static void virtnet_freeze_down(struct virtio_device *vdev)
2149 {
2150 	struct virtnet_info *vi = vdev->priv;
2151 	int i;
2152 
2153 	/* Make sure no work handler is accessing the device */
2154 	flush_work(&vi->config_work);
2155 
2156 	netif_device_detach(vi->dev);
2157 	netif_tx_disable(vi->dev);
2158 	cancel_delayed_work_sync(&vi->refill);
2159 
2160 	if (netif_running(vi->dev)) {
2161 		for (i = 0; i < vi->max_queue_pairs; i++) {
2162 			napi_disable(&vi->rq[i].napi);
2163 			virtnet_napi_tx_disable(&vi->sq[i].napi);
2164 		}
2165 	}
2166 }
2167 
2168 static int init_vqs(struct virtnet_info *vi);
2169 
2170 static int virtnet_restore_up(struct virtio_device *vdev)
2171 {
2172 	struct virtnet_info *vi = vdev->priv;
2173 	int err, i;
2174 
2175 	err = init_vqs(vi);
2176 	if (err)
2177 		return err;
2178 
2179 	virtio_device_ready(vdev);
2180 
2181 	if (netif_running(vi->dev)) {
2182 		for (i = 0; i < vi->curr_queue_pairs; i++)
2183 			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
2184 				schedule_delayed_work(&vi->refill, 0);
2185 
2186 		for (i = 0; i < vi->max_queue_pairs; i++) {
2187 			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2188 			virtnet_napi_tx_enable(vi, vi->sq[i].vq,
2189 					       &vi->sq[i].napi);
2190 		}
2191 	}
2192 
2193 	netif_device_attach(vi->dev);
2194 	return err;
2195 }
2196 
2197 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
2198 {
2199 	struct scatterlist sg;
2200 	vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
2201 
2202 	sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
2203 
2204 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
2205 				  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
2206 		dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
2207 		return -EINVAL;
2208 	}
2209 
2210 	return 0;
2211 }
2212 
2213 static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
2214 {
2215 	u64 offloads = 0;
2216 
2217 	if (!vi->guest_offloads)
2218 		return 0;
2219 
2220 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
2221 		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
2222 
2223 	return virtnet_set_guest_offloads(vi, offloads);
2224 }
2225 
2226 static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
2227 {
2228 	u64 offloads = vi->guest_offloads;
2229 
2230 	if (!vi->guest_offloads)
2231 		return 0;
2232 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
2233 		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
2234 
2235 	return virtnet_set_guest_offloads(vi, offloads);
2236 }
2237 
2238 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
2239 			   struct netlink_ext_ack *extack)
2240 {
2241 	unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
2242 	struct virtnet_info *vi = netdev_priv(dev);
2243 	struct bpf_prog *old_prog;
2244 	u16 xdp_qp = 0, curr_qp;
2245 	int i, err;
2246 
2247 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
2248 	    && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2249 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2250 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
2251 		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
2252 		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
2253 		return -EOPNOTSUPP;
2254 	}
2255 
2256 	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
2257 		NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
2258 		return -EINVAL;
2259 	}
2260 
2261 	if (dev->mtu > max_sz) {
2262 		NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
2263 		netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
2264 		return -EINVAL;
2265 	}
2266 
2267 	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
2268 	if (prog)
2269 		xdp_qp = nr_cpu_ids;
2270 
2271 	/* XDP requires extra queues for XDP_TX */
2272 	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
2273 		NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
2274 		netdev_warn(dev, "request %i queues but max is %i\n",
2275 			    curr_qp + xdp_qp, vi->max_queue_pairs);
2276 		return -ENOMEM;
2277 	}
2278 
2279 	if (prog) {
2280 		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
2281 		if (IS_ERR(prog))
2282 			return PTR_ERR(prog);
2283 	}
2284 
2285 	/* Make sure NAPI is not using any XDP TX queues for RX. */
2286 	if (netif_running(dev))
2287 		for (i = 0; i < vi->max_queue_pairs; i++)
2288 			napi_disable(&vi->rq[i].napi);
2289 
2290 	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
2291 	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
2292 	if (err)
2293 		goto err;
2294 	vi->xdp_queue_pairs = xdp_qp;
2295 
2296 	for (i = 0; i < vi->max_queue_pairs; i++) {
2297 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2298 		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
2299 		if (i == 0) {
2300 			if (!old_prog)
2301 				virtnet_clear_guest_offloads(vi);
2302 			if (!prog)
2303 				virtnet_restore_guest_offloads(vi);
2304 		}
2305 		if (old_prog)
2306 			bpf_prog_put(old_prog);
2307 		if (netif_running(dev))
2308 			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2309 	}
2310 
2311 	return 0;
2312 
2313 err:
2314 	for (i = 0; i < vi->max_queue_pairs; i++)
2315 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2316 	if (prog)
2317 		bpf_prog_sub(prog, vi->max_queue_pairs - 1);
2318 	return err;
2319 }
2320 
2321 static u32 virtnet_xdp_query(struct net_device *dev)
2322 {
2323 	struct virtnet_info *vi = netdev_priv(dev);
2324 	const struct bpf_prog *xdp_prog;
2325 	int i;
2326 
2327 	for (i = 0; i < vi->max_queue_pairs; i++) {
2328 		xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2329 		if (xdp_prog)
2330 			return xdp_prog->aux->id;
2331 	}
2332 	return 0;
2333 }
2334 
2335 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2336 {
2337 	switch (xdp->command) {
2338 	case XDP_SETUP_PROG:
2339 		return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
2340 	case XDP_QUERY_PROG:
2341 		xdp->prog_id = virtnet_xdp_query(dev);
2342 		xdp->prog_attached = !!xdp->prog_id;
2343 		return 0;
2344 	default:
2345 		return -EINVAL;
2346 	}
2347 }
2348 
2349 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
2350 				      size_t len)
2351 {
2352 	struct virtnet_info *vi = netdev_priv(dev);
2353 	int ret;
2354 
2355 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
2356 		return -EOPNOTSUPP;
2357 
2358 	ret = snprintf(buf, len, "sby");
2359 	if (ret >= len)
2360 		return -EOPNOTSUPP;
2361 
2362 	return 0;
2363 }
2364 
2365 static const struct net_device_ops virtnet_netdev = {
2366 	.ndo_open            = virtnet_open,
2367 	.ndo_stop   	     = virtnet_close,
2368 	.ndo_start_xmit      = start_xmit,
2369 	.ndo_validate_addr   = eth_validate_addr,
2370 	.ndo_set_mac_address = virtnet_set_mac_address,
2371 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
2372 	.ndo_get_stats64     = virtnet_stats,
2373 	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
2374 	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
2375 #ifdef CONFIG_NET_POLL_CONTROLLER
2376 	.ndo_poll_controller = virtnet_netpoll,
2377 #endif
2378 	.ndo_bpf		= virtnet_xdp,
2379 	.ndo_xdp_xmit		= virtnet_xdp_xmit,
2380 	.ndo_xdp_flush		= virtnet_xdp_flush,
2381 	.ndo_features_check	= passthru_features_check,
2382 	.ndo_get_phys_port_name	= virtnet_get_phys_port_name,
2383 };
2384 
2385 static void virtnet_config_changed_work(struct work_struct *work)
2386 {
2387 	struct virtnet_info *vi =
2388 		container_of(work, struct virtnet_info, config_work);
2389 	u16 v;
2390 
2391 	if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
2392 				 struct virtio_net_config, status, &v) < 0)
2393 		return;
2394 
2395 	if (v & VIRTIO_NET_S_ANNOUNCE) {
2396 		netdev_notify_peers(vi->dev);
2397 		virtnet_ack_link_announce(vi);
2398 	}
2399 
2400 	/* Ignore unknown (future) status bits */
2401 	v &= VIRTIO_NET_S_LINK_UP;
2402 
2403 	if (vi->status == v)
2404 		return;
2405 
2406 	vi->status = v;
2407 
2408 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
2409 		virtnet_update_settings(vi);
2410 		netif_carrier_on(vi->dev);
2411 		netif_tx_wake_all_queues(vi->dev);
2412 	} else {
2413 		netif_carrier_off(vi->dev);
2414 		netif_tx_stop_all_queues(vi->dev);
2415 	}
2416 }
2417 
2418 static void virtnet_config_changed(struct virtio_device *vdev)
2419 {
2420 	struct virtnet_info *vi = vdev->priv;
2421 
2422 	schedule_work(&vi->config_work);
2423 }
2424 
2425 static void virtnet_free_queues(struct virtnet_info *vi)
2426 {
2427 	int i;
2428 
2429 	for (i = 0; i < vi->max_queue_pairs; i++) {
2430 		napi_hash_del(&vi->rq[i].napi);
2431 		netif_napi_del(&vi->rq[i].napi);
2432 		netif_napi_del(&vi->sq[i].napi);
2433 	}
2434 
2435 	/* We called napi_hash_del() before netif_napi_del(),
2436 	 * we need to respect an RCU grace period before freeing vi->rq
2437 	 */
2438 	synchronize_net();
2439 
2440 	kfree(vi->rq);
2441 	kfree(vi->sq);
2442 	kfree(vi->ctrl);
2443 }
2444 
2445 static void _free_receive_bufs(struct virtnet_info *vi)
2446 {
2447 	struct bpf_prog *old_prog;
2448 	int i;
2449 
2450 	for (i = 0; i < vi->max_queue_pairs; i++) {
2451 		while (vi->rq[i].pages)
2452 			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
2453 
2454 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2455 		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
2456 		if (old_prog)
2457 			bpf_prog_put(old_prog);
2458 	}
2459 }
2460 
2461 static void free_receive_bufs(struct virtnet_info *vi)
2462 {
2463 	rtnl_lock();
2464 	_free_receive_bufs(vi);
2465 	rtnl_unlock();
2466 }
2467 
2468 static void free_receive_page_frags(struct virtnet_info *vi)
2469 {
2470 	int i;
2471 	for (i = 0; i < vi->max_queue_pairs; i++)
2472 		if (vi->rq[i].alloc_frag.page)
2473 			put_page(vi->rq[i].alloc_frag.page);
2474 }
2475 
2476 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
2477 {
2478 	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
2479 		return false;
2480 	else if (q < vi->curr_queue_pairs)
2481 		return true;
2482 	else
2483 		return false;
2484 }
2485 
2486 static void free_unused_bufs(struct virtnet_info *vi)
2487 {
2488 	void *buf;
2489 	int i;
2490 
2491 	for (i = 0; i < vi->max_queue_pairs; i++) {
2492 		struct virtqueue *vq = vi->sq[i].vq;
2493 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2494 			if (!is_xdp_raw_buffer_queue(vi, i))
2495 				dev_kfree_skb(buf);
2496 			else
2497 				put_page(virt_to_head_page(buf));
2498 		}
2499 	}
2500 
2501 	for (i = 0; i < vi->max_queue_pairs; i++) {
2502 		struct virtqueue *vq = vi->rq[i].vq;
2503 
2504 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2505 			if (vi->mergeable_rx_bufs) {
2506 				put_page(virt_to_head_page(buf));
2507 			} else if (vi->big_packets) {
2508 				give_pages(&vi->rq[i], buf);
2509 			} else {
2510 				put_page(virt_to_head_page(buf));
2511 			}
2512 		}
2513 	}
2514 }
2515 
2516 static void virtnet_del_vqs(struct virtnet_info *vi)
2517 {
2518 	struct virtio_device *vdev = vi->vdev;
2519 
2520 	virtnet_clean_affinity(vi, -1);
2521 
2522 	vdev->config->del_vqs(vdev);
2523 
2524 	virtnet_free_queues(vi);
2525 }
2526 
2527 /* How large should a single buffer be so a queue full of these can fit at
2528  * least one full packet?
2529  * Logic below assumes the mergeable buffer header is used.
2530  */
2531 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
2532 {
2533 	const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2534 	unsigned int rq_size = virtqueue_get_vring_size(vq);
2535 	unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
2536 	unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
2537 	unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
2538 
2539 	return max(max(min_buf_len, hdr_len) - hdr_len,
2540 		   (unsigned int)GOOD_PACKET_LEN);
2541 }
2542 
2543 static int virtnet_find_vqs(struct virtnet_info *vi)
2544 {
2545 	vq_callback_t **callbacks;
2546 	struct virtqueue **vqs;
2547 	int ret = -ENOMEM;
2548 	int i, total_vqs;
2549 	const char **names;
2550 	bool *ctx;
2551 
2552 	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
2553 	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
2554 	 * possible control vq.
2555 	 */
2556 	total_vqs = vi->max_queue_pairs * 2 +
2557 		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
2558 
2559 	/* Allocate space for find_vqs parameters */
2560 	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
2561 	if (!vqs)
2562 		goto err_vq;
2563 	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
2564 	if (!callbacks)
2565 		goto err_callback;
2566 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
2567 	if (!names)
2568 		goto err_names;
2569 	if (!vi->big_packets || vi->mergeable_rx_bufs) {
2570 		ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
2571 		if (!ctx)
2572 			goto err_ctx;
2573 	} else {
2574 		ctx = NULL;
2575 	}
2576 
2577 	/* Parameters for control virtqueue, if any */
2578 	if (vi->has_cvq) {
2579 		callbacks[total_vqs - 1] = NULL;
2580 		names[total_vqs - 1] = "control";
2581 	}
2582 
2583 	/* Allocate/initialize parameters for send/receive virtqueues */
2584 	for (i = 0; i < vi->max_queue_pairs; i++) {
2585 		callbacks[rxq2vq(i)] = skb_recv_done;
2586 		callbacks[txq2vq(i)] = skb_xmit_done;
2587 		sprintf(vi->rq[i].name, "input.%d", i);
2588 		sprintf(vi->sq[i].name, "output.%d", i);
2589 		names[rxq2vq(i)] = vi->rq[i].name;
2590 		names[txq2vq(i)] = vi->sq[i].name;
2591 		if (ctx)
2592 			ctx[rxq2vq(i)] = true;
2593 	}
2594 
2595 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
2596 					 names, ctx, NULL);
2597 	if (ret)
2598 		goto err_find;
2599 
2600 	if (vi->has_cvq) {
2601 		vi->cvq = vqs[total_vqs - 1];
2602 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
2603 			vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2604 	}
2605 
2606 	for (i = 0; i < vi->max_queue_pairs; i++) {
2607 		vi->rq[i].vq = vqs[rxq2vq(i)];
2608 		vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
2609 		vi->sq[i].vq = vqs[txq2vq(i)];
2610 	}
2611 
2612 	kfree(names);
2613 	kfree(callbacks);
2614 	kfree(vqs);
2615 	kfree(ctx);
2616 
2617 	return 0;
2618 
2619 err_find:
2620 	kfree(ctx);
2621 err_ctx:
2622 	kfree(names);
2623 err_names:
2624 	kfree(callbacks);
2625 err_callback:
2626 	kfree(vqs);
2627 err_vq:
2628 	return ret;
2629 }
2630 
2631 static int virtnet_alloc_queues(struct virtnet_info *vi)
2632 {
2633 	int i;
2634 
2635 	vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
2636 	if (!vi->ctrl)
2637 		goto err_ctrl;
2638 	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
2639 	if (!vi->sq)
2640 		goto err_sq;
2641 	vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
2642 	if (!vi->rq)
2643 		goto err_rq;
2644 
2645 	INIT_DELAYED_WORK(&vi->refill, refill_work);
2646 	for (i = 0; i < vi->max_queue_pairs; i++) {
2647 		vi->rq[i].pages = NULL;
2648 		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
2649 			       napi_weight);
2650 		netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
2651 				  napi_tx ? napi_weight : 0);
2652 
2653 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
2654 		ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
2655 		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
2656 
2657 		u64_stats_init(&vi->rq[i].stats.syncp);
2658 		u64_stats_init(&vi->sq[i].stats.syncp);
2659 	}
2660 
2661 	return 0;
2662 
2663 err_rq:
2664 	kfree(vi->sq);
2665 err_sq:
2666 	kfree(vi->ctrl);
2667 err_ctrl:
2668 	return -ENOMEM;
2669 }
2670 
2671 static int init_vqs(struct virtnet_info *vi)
2672 {
2673 	int ret;
2674 
2675 	/* Allocate send & receive queues */
2676 	ret = virtnet_alloc_queues(vi);
2677 	if (ret)
2678 		goto err;
2679 
2680 	ret = virtnet_find_vqs(vi);
2681 	if (ret)
2682 		goto err_free;
2683 
2684 	get_online_cpus();
2685 	virtnet_set_affinity(vi);
2686 	put_online_cpus();
2687 
2688 	return 0;
2689 
2690 err_free:
2691 	virtnet_free_queues(vi);
2692 err:
2693 	return ret;
2694 }
2695 
2696 #ifdef CONFIG_SYSFS
2697 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2698 		char *buf)
2699 {
2700 	struct virtnet_info *vi = netdev_priv(queue->dev);
2701 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
2702 	unsigned int headroom = virtnet_get_headroom(vi);
2703 	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2704 	struct ewma_pkt_len *avg;
2705 
2706 	BUG_ON(queue_index >= vi->max_queue_pairs);
2707 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
2708 	return sprintf(buf, "%u\n",
2709 		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
2710 				       SKB_DATA_ALIGN(headroom + tailroom)));
2711 }
2712 
2713 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
2714 	__ATTR_RO(mergeable_rx_buffer_size);
2715 
2716 static struct attribute *virtio_net_mrg_rx_attrs[] = {
2717 	&mergeable_rx_buffer_size_attribute.attr,
2718 	NULL
2719 };
2720 
2721 static const struct attribute_group virtio_net_mrg_rx_group = {
2722 	.name = "virtio_net",
2723 	.attrs = virtio_net_mrg_rx_attrs
2724 };
2725 #endif
2726 
2727 static bool virtnet_fail_on_feature(struct virtio_device *vdev,
2728 				    unsigned int fbit,
2729 				    const char *fname, const char *dname)
2730 {
2731 	if (!virtio_has_feature(vdev, fbit))
2732 		return false;
2733 
2734 	dev_err(&vdev->dev, "device advertises feature %s but not %s",
2735 		fname, dname);
2736 
2737 	return true;
2738 }
2739 
2740 #define VIRTNET_FAIL_ON(vdev, fbit, dbit)			\
2741 	virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
2742 
2743 static bool virtnet_validate_features(struct virtio_device *vdev)
2744 {
2745 	if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
2746 	    (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
2747 			     "VIRTIO_NET_F_CTRL_VQ") ||
2748 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
2749 			     "VIRTIO_NET_F_CTRL_VQ") ||
2750 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
2751 			     "VIRTIO_NET_F_CTRL_VQ") ||
2752 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
2753 	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
2754 			     "VIRTIO_NET_F_CTRL_VQ"))) {
2755 		return false;
2756 	}
2757 
2758 	return true;
2759 }
2760 
2761 #define MIN_MTU ETH_MIN_MTU
2762 #define MAX_MTU ETH_MAX_MTU
2763 
2764 static int virtnet_validate(struct virtio_device *vdev)
2765 {
2766 	if (!vdev->config->get) {
2767 		dev_err(&vdev->dev, "%s failure: config access disabled\n",
2768 			__func__);
2769 		return -EINVAL;
2770 	}
2771 
2772 	if (!virtnet_validate_features(vdev))
2773 		return -EINVAL;
2774 
2775 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2776 		int mtu = virtio_cread16(vdev,
2777 					 offsetof(struct virtio_net_config,
2778 						  mtu));
2779 		if (mtu < MIN_MTU)
2780 			__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2781 	}
2782 
2783 	return 0;
2784 }
2785 
2786 static int virtnet_probe(struct virtio_device *vdev)
2787 {
2788 	int i, err = -ENOMEM;
2789 	struct net_device *dev;
2790 	struct virtnet_info *vi;
2791 	u16 max_queue_pairs;
2792 	int mtu;
2793 
2794 	/* Find if host supports multiqueue virtio_net device */
2795 	err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
2796 				   struct virtio_net_config,
2797 				   max_virtqueue_pairs, &max_queue_pairs);
2798 
2799 	/* We need at least 2 queue's */
2800 	if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2801 	    max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
2802 	    !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2803 		max_queue_pairs = 1;
2804 
2805 	/* Allocate ourselves a network device with room for our info */
2806 	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
2807 	if (!dev)
2808 		return -ENOMEM;
2809 
2810 	/* Set up network device as normal. */
2811 	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
2812 	dev->netdev_ops = &virtnet_netdev;
2813 	dev->features = NETIF_F_HIGHDMA;
2814 
2815 	dev->ethtool_ops = &virtnet_ethtool_ops;
2816 	SET_NETDEV_DEV(dev, &vdev->dev);
2817 
2818 	/* Do we support "hardware" checksums? */
2819 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
2820 		/* This opens up the world of extra features. */
2821 		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2822 		if (csum)
2823 			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2824 
2825 		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
2826 			dev->hw_features |= NETIF_F_TSO
2827 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
2828 		}
2829 		/* Individual feature bits: what can host handle? */
2830 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
2831 			dev->hw_features |= NETIF_F_TSO;
2832 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
2833 			dev->hw_features |= NETIF_F_TSO6;
2834 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
2835 			dev->hw_features |= NETIF_F_TSO_ECN;
2836 
2837 		dev->features |= NETIF_F_GSO_ROBUST;
2838 
2839 		if (gso)
2840 			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
2841 		/* (!csum && gso) case will be fixed by register_netdev() */
2842 	}
2843 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
2844 		dev->features |= NETIF_F_RXCSUM;
2845 
2846 	dev->vlan_features = dev->features;
2847 
2848 	/* MTU range: 68 - 65535 */
2849 	dev->min_mtu = MIN_MTU;
2850 	dev->max_mtu = MAX_MTU;
2851 
2852 	/* Configuration may specify what MAC to use.  Otherwise random. */
2853 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
2854 		virtio_cread_bytes(vdev,
2855 				   offsetof(struct virtio_net_config, mac),
2856 				   dev->dev_addr, dev->addr_len);
2857 	else
2858 		eth_hw_addr_random(dev);
2859 
2860 	/* Set up our device-specific information */
2861 	vi = netdev_priv(dev);
2862 	vi->dev = dev;
2863 	vi->vdev = vdev;
2864 	vdev->priv = vi;
2865 
2866 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
2867 
2868 	/* If we can receive ANY GSO packets, we must allocate large ones. */
2869 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2870 	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2871 	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
2872 	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
2873 		vi->big_packets = true;
2874 
2875 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
2876 		vi->mergeable_rx_bufs = true;
2877 
2878 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
2879 	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2880 		vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2881 	else
2882 		vi->hdr_len = sizeof(struct virtio_net_hdr);
2883 
2884 	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
2885 	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2886 		vi->any_header_sg = true;
2887 
2888 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2889 		vi->has_cvq = true;
2890 
2891 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2892 		mtu = virtio_cread16(vdev,
2893 				     offsetof(struct virtio_net_config,
2894 					      mtu));
2895 		if (mtu < dev->min_mtu) {
2896 			/* Should never trigger: MTU was previously validated
2897 			 * in virtnet_validate.
2898 			 */
2899 			dev_err(&vdev->dev, "device MTU appears to have changed "
2900 				"it is now %d < %d", mtu, dev->min_mtu);
2901 			goto free;
2902 		}
2903 
2904 		dev->mtu = mtu;
2905 		dev->max_mtu = mtu;
2906 
2907 		/* TODO: size buffers correctly in this case. */
2908 		if (dev->mtu > ETH_DATA_LEN)
2909 			vi->big_packets = true;
2910 	}
2911 
2912 	if (vi->any_header_sg)
2913 		dev->needed_headroom = vi->hdr_len;
2914 
2915 	/* Enable multiqueue by default */
2916 	if (num_online_cpus() >= max_queue_pairs)
2917 		vi->curr_queue_pairs = max_queue_pairs;
2918 	else
2919 		vi->curr_queue_pairs = num_online_cpus();
2920 	vi->max_queue_pairs = max_queue_pairs;
2921 
2922 	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
2923 	err = init_vqs(vi);
2924 	if (err)
2925 		goto free;
2926 
2927 #ifdef CONFIG_SYSFS
2928 	if (vi->mergeable_rx_bufs)
2929 		dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
2930 #endif
2931 	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
2932 	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
2933 
2934 	virtnet_init_settings(dev);
2935 
2936 	if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
2937 		vi->failover = net_failover_create(vi->dev);
2938 		if (IS_ERR(vi->failover))
2939 			goto free_vqs;
2940 	}
2941 
2942 	err = register_netdev(dev);
2943 	if (err) {
2944 		pr_debug("virtio_net: registering device failed\n");
2945 		goto free_failover;
2946 	}
2947 
2948 	virtio_device_ready(vdev);
2949 
2950 	err = virtnet_cpu_notif_add(vi);
2951 	if (err) {
2952 		pr_debug("virtio_net: registering cpu notifier failed\n");
2953 		goto free_unregister_netdev;
2954 	}
2955 
2956 	virtnet_set_queues(vi, vi->curr_queue_pairs);
2957 
2958 	/* Assume link up if device can't report link status,
2959 	   otherwise get link status from config. */
2960 	netif_carrier_off(dev);
2961 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
2962 		schedule_work(&vi->config_work);
2963 	} else {
2964 		vi->status = VIRTIO_NET_S_LINK_UP;
2965 		virtnet_update_settings(vi);
2966 		netif_carrier_on(dev);
2967 	}
2968 
2969 	for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
2970 		if (virtio_has_feature(vi->vdev, guest_offloads[i]))
2971 			set_bit(guest_offloads[i], &vi->guest_offloads);
2972 
2973 	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
2974 		 dev->name, max_queue_pairs);
2975 
2976 	return 0;
2977 
2978 free_unregister_netdev:
2979 	vi->vdev->config->reset(vdev);
2980 
2981 	unregister_netdev(dev);
2982 free_failover:
2983 	net_failover_destroy(vi->failover);
2984 free_vqs:
2985 	cancel_delayed_work_sync(&vi->refill);
2986 	free_receive_page_frags(vi);
2987 	virtnet_del_vqs(vi);
2988 free:
2989 	free_netdev(dev);
2990 	return err;
2991 }
2992 
2993 static void remove_vq_common(struct virtnet_info *vi)
2994 {
2995 	vi->vdev->config->reset(vi->vdev);
2996 
2997 	/* Free unused buffers in both send and recv, if any. */
2998 	free_unused_bufs(vi);
2999 
3000 	free_receive_bufs(vi);
3001 
3002 	free_receive_page_frags(vi);
3003 
3004 	virtnet_del_vqs(vi);
3005 }
3006 
3007 static void virtnet_remove(struct virtio_device *vdev)
3008 {
3009 	struct virtnet_info *vi = vdev->priv;
3010 
3011 	virtnet_cpu_notif_remove(vi);
3012 
3013 	/* Make sure no work handler is accessing the device. */
3014 	flush_work(&vi->config_work);
3015 
3016 	unregister_netdev(vi->dev);
3017 
3018 	net_failover_destroy(vi->failover);
3019 
3020 	remove_vq_common(vi);
3021 
3022 	free_netdev(vi->dev);
3023 }
3024 
3025 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
3026 {
3027 	struct virtnet_info *vi = vdev->priv;
3028 
3029 	virtnet_cpu_notif_remove(vi);
3030 	virtnet_freeze_down(vdev);
3031 	remove_vq_common(vi);
3032 
3033 	return 0;
3034 }
3035 
3036 static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
3037 {
3038 	struct virtnet_info *vi = vdev->priv;
3039 	int err;
3040 
3041 	err = virtnet_restore_up(vdev);
3042 	if (err)
3043 		return err;
3044 	virtnet_set_queues(vi, vi->curr_queue_pairs);
3045 
3046 	err = virtnet_cpu_notif_add(vi);
3047 	if (err)
3048 		return err;
3049 
3050 	return 0;
3051 }
3052 
3053 static struct virtio_device_id id_table[] = {
3054 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3055 	{ 0 },
3056 };
3057 
3058 #define VIRTNET_FEATURES \
3059 	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
3060 	VIRTIO_NET_F_MAC, \
3061 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
3062 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
3063 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
3064 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
3065 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
3066 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
3067 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
3068 	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
3069 	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
3070 
3071 static unsigned int features[] = {
3072 	VIRTNET_FEATURES,
3073 };
3074 
3075 static unsigned int features_legacy[] = {
3076 	VIRTNET_FEATURES,
3077 	VIRTIO_NET_F_GSO,
3078 	VIRTIO_F_ANY_LAYOUT,
3079 };
3080 
3081 static struct virtio_driver virtio_net_driver = {
3082 	.feature_table = features,
3083 	.feature_table_size = ARRAY_SIZE(features),
3084 	.feature_table_legacy = features_legacy,
3085 	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
3086 	.driver.name =	KBUILD_MODNAME,
3087 	.driver.owner =	THIS_MODULE,
3088 	.id_table =	id_table,
3089 	.validate =	virtnet_validate,
3090 	.probe =	virtnet_probe,
3091 	.remove =	virtnet_remove,
3092 	.config_changed = virtnet_config_changed,
3093 #ifdef CONFIG_PM_SLEEP
3094 	.freeze =	virtnet_freeze,
3095 	.restore =	virtnet_restore,
3096 #endif
3097 };
3098 
3099 static __init int virtio_net_driver_init(void)
3100 {
3101 	int ret;
3102 
3103 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
3104 				      virtnet_cpu_online,
3105 				      virtnet_cpu_down_prep);
3106 	if (ret < 0)
3107 		goto out;
3108 	virtionet_online = ret;
3109 	ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
3110 				      NULL, virtnet_cpu_dead);
3111 	if (ret)
3112 		goto err_dead;
3113 
3114         ret = register_virtio_driver(&virtio_net_driver);
3115 	if (ret)
3116 		goto err_virtio;
3117 	return 0;
3118 err_virtio:
3119 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
3120 err_dead:
3121 	cpuhp_remove_multi_state(virtionet_online);
3122 out:
3123 	return ret;
3124 }
3125 module_init(virtio_net_driver_init);
3126 
3127 static __exit void virtio_net_driver_exit(void)
3128 {
3129 	unregister_virtio_driver(&virtio_net_driver);
3130 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
3131 	cpuhp_remove_multi_state(virtionet_online);
3132 }
3133 module_exit(virtio_net_driver_exit);
3134 
3135 MODULE_DEVICE_TABLE(virtio, id_table);
3136 MODULE_DESCRIPTION("Virtio network driver");
3137 MODULE_LICENSE("GPL");
3138