xref: /openbmc/linux/drivers/net/virtio_net.c (revision fb6813f480806d62361719e84777c8e00d3e86a8)
1296f96fcSRusty Russell /* A simple network driver using virtio.
2296f96fcSRusty Russell  *
3296f96fcSRusty Russell  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4296f96fcSRusty Russell  *
5296f96fcSRusty Russell  * This program is free software; you can redistribute it and/or modify
6296f96fcSRusty Russell  * it under the terms of the GNU General Public License as published by
7296f96fcSRusty Russell  * the Free Software Foundation; either version 2 of the License, or
8296f96fcSRusty Russell  * (at your option) any later version.
9296f96fcSRusty Russell  *
10296f96fcSRusty Russell  * This program is distributed in the hope that it will be useful,
11296f96fcSRusty Russell  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12296f96fcSRusty Russell  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13296f96fcSRusty Russell  * GNU General Public License for more details.
14296f96fcSRusty Russell  *
15296f96fcSRusty Russell  * You should have received a copy of the GNU General Public License
16296f96fcSRusty Russell  * along with this program; if not, write to the Free Software
17296f96fcSRusty Russell  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18296f96fcSRusty Russell  */
19296f96fcSRusty Russell //#define DEBUG
20296f96fcSRusty Russell #include <linux/netdevice.h>
21296f96fcSRusty Russell #include <linux/etherdevice.h>
22a9ea3fc6SHerbert Xu #include <linux/ethtool.h>
23296f96fcSRusty Russell #include <linux/module.h>
24296f96fcSRusty Russell #include <linux/virtio.h>
25296f96fcSRusty Russell #include <linux/virtio_net.h>
26296f96fcSRusty Russell #include <linux/scatterlist.h>
27296f96fcSRusty Russell 
286c0cd7c0SDor Laor static int napi_weight = 128;
296c0cd7c0SDor Laor module_param(napi_weight, int, 0444);
306c0cd7c0SDor Laor 
3134a48579SRusty Russell static int csum = 1, gso = 1;
3234a48579SRusty Russell module_param(csum, bool, 0444);
3334a48579SRusty Russell module_param(gso, bool, 0444);
3434a48579SRusty Russell 
35296f96fcSRusty Russell /* FIXME: MTU in config. */
36296f96fcSRusty Russell #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
37296f96fcSRusty Russell 
38296f96fcSRusty Russell struct virtnet_info
39296f96fcSRusty Russell {
40296f96fcSRusty Russell 	struct virtio_device *vdev;
41296f96fcSRusty Russell 	struct virtqueue *rvq, *svq;
42296f96fcSRusty Russell 	struct net_device *dev;
43296f96fcSRusty Russell 	struct napi_struct napi;
44296f96fcSRusty Russell 
4599ffc696SRusty Russell 	/* The skb we couldn't send because buffers were full. */
4699ffc696SRusty Russell 	struct sk_buff *last_xmit_skb;
4799ffc696SRusty Russell 
48363f1514SRusty Russell 	/* If we need to free in a timer, this is it. */
4914c998f0SMark McLoughlin 	struct timer_list xmit_free_timer;
5014c998f0SMark McLoughlin 
51296f96fcSRusty Russell 	/* Number of input buffers, and max we've ever had. */
52296f96fcSRusty Russell 	unsigned int num, max;
53296f96fcSRusty Russell 
5411a3a154SRusty Russell 	/* For cleaning up after transmission. */
5511a3a154SRusty Russell 	struct tasklet_struct tasklet;
56363f1514SRusty Russell 	bool free_in_tasklet;
5711a3a154SRusty Russell 
5897402b96SHerbert Xu 	/* I like... big packets and I cannot lie! */
5997402b96SHerbert Xu 	bool big_packets;
6097402b96SHerbert Xu 
61296f96fcSRusty Russell 	/* Receive & send queues. */
62296f96fcSRusty Russell 	struct sk_buff_head recv;
63296f96fcSRusty Russell 	struct sk_buff_head send;
64*fb6813f4SRusty Russell 
65*fb6813f4SRusty Russell 	/* Chain pages by the private ptr. */
66*fb6813f4SRusty Russell 	struct page *pages;
67296f96fcSRusty Russell };
68296f96fcSRusty Russell 
69296f96fcSRusty Russell static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
70296f96fcSRusty Russell {
71296f96fcSRusty Russell 	return (struct virtio_net_hdr *)skb->cb;
72296f96fcSRusty Russell }
73296f96fcSRusty Russell 
74296f96fcSRusty Russell static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
75296f96fcSRusty Russell {
76296f96fcSRusty Russell 	sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
77296f96fcSRusty Russell }
78296f96fcSRusty Russell 
79*fb6813f4SRusty Russell static void give_a_page(struct virtnet_info *vi, struct page *page)
80*fb6813f4SRusty Russell {
81*fb6813f4SRusty Russell 	page->private = (unsigned long)vi->pages;
82*fb6813f4SRusty Russell 	vi->pages = page;
83*fb6813f4SRusty Russell }
84*fb6813f4SRusty Russell 
85*fb6813f4SRusty Russell static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
86*fb6813f4SRusty Russell {
87*fb6813f4SRusty Russell 	struct page *p = vi->pages;
88*fb6813f4SRusty Russell 
89*fb6813f4SRusty Russell 	if (p)
90*fb6813f4SRusty Russell 		vi->pages = (struct page *)p->private;
91*fb6813f4SRusty Russell 	else
92*fb6813f4SRusty Russell 		p = alloc_page(gfp_mask);
93*fb6813f4SRusty Russell 	return p;
94*fb6813f4SRusty Russell }
95*fb6813f4SRusty Russell 
962cb9c6baSRusty Russell static void skb_xmit_done(struct virtqueue *svq)
97296f96fcSRusty Russell {
982cb9c6baSRusty Russell 	struct virtnet_info *vi = svq->vdev->priv;
99296f96fcSRusty Russell 
1002cb9c6baSRusty Russell 	/* Suppress further interrupts. */
1012cb9c6baSRusty Russell 	svq->vq_ops->disable_cb(svq);
10211a3a154SRusty Russell 
103363f1514SRusty Russell 	/* We were probably waiting for more output buffers. */
104296f96fcSRusty Russell 	netif_wake_queue(vi->dev);
10511a3a154SRusty Russell 
10611a3a154SRusty Russell 	/* Make sure we re-xmit last_xmit_skb: if there are no more packets
10711a3a154SRusty Russell 	 * queued, start_xmit won't be called. */
10811a3a154SRusty Russell 	tasklet_schedule(&vi->tasklet);
109296f96fcSRusty Russell }
110296f96fcSRusty Russell 
111296f96fcSRusty Russell static void receive_skb(struct net_device *dev, struct sk_buff *skb,
112296f96fcSRusty Russell 			unsigned len)
113296f96fcSRusty Russell {
114296f96fcSRusty Russell 	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
11597402b96SHerbert Xu 	int err;
116296f96fcSRusty Russell 
117296f96fcSRusty Russell 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
118296f96fcSRusty Russell 		pr_debug("%s: short packet %i\n", dev->name, len);
119296f96fcSRusty Russell 		dev->stats.rx_length_errors++;
120296f96fcSRusty Russell 		goto drop;
121296f96fcSRusty Russell 	}
122296f96fcSRusty Russell 	len -= sizeof(struct virtio_net_hdr);
123296f96fcSRusty Russell 
124*fb6813f4SRusty Russell 	if (len <= MAX_PACKET_LEN) {
125*fb6813f4SRusty Russell 		unsigned int i;
126*fb6813f4SRusty Russell 
127*fb6813f4SRusty Russell 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
128*fb6813f4SRusty Russell 			give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
129*fb6813f4SRusty Russell 		skb->data_len = 0;
130*fb6813f4SRusty Russell 		skb_shinfo(skb)->nr_frags = 0;
131*fb6813f4SRusty Russell 	}
132*fb6813f4SRusty Russell 
13397402b96SHerbert Xu 	err = pskb_trim(skb, len);
13497402b96SHerbert Xu 	if (err) {
13597402b96SHerbert Xu 		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
13697402b96SHerbert Xu 		dev->stats.rx_dropped++;
13797402b96SHerbert Xu 		goto drop;
13897402b96SHerbert Xu 	}
13997402b96SHerbert Xu 	skb->truesize += skb->data_len;
140296f96fcSRusty Russell 	dev->stats.rx_bytes += skb->len;
141296f96fcSRusty Russell 	dev->stats.rx_packets++;
142296f96fcSRusty Russell 
143296f96fcSRusty Russell 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
144296f96fcSRusty Russell 		pr_debug("Needs csum!\n");
145f35d9d8aSRusty Russell 		if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
146296f96fcSRusty Russell 			goto frame_err;
147296f96fcSRusty Russell 	}
148296f96fcSRusty Russell 
14923cde76dSMark McLoughlin 	skb->protocol = eth_type_trans(skb, dev);
15023cde76dSMark McLoughlin 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
15123cde76dSMark McLoughlin 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
15223cde76dSMark McLoughlin 
153296f96fcSRusty Russell 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
154296f96fcSRusty Russell 		pr_debug("GSO!\n");
15534a48579SRusty Russell 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
156296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_TCPV4:
157296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
158296f96fcSRusty Russell 			break;
159296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_UDP:
160296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
161296f96fcSRusty Russell 			break;
162296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_TCPV6:
163296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
164296f96fcSRusty Russell 			break;
165296f96fcSRusty Russell 		default:
166296f96fcSRusty Russell 			if (net_ratelimit())
167296f96fcSRusty Russell 				printk(KERN_WARNING "%s: bad gso type %u.\n",
168296f96fcSRusty Russell 				       dev->name, hdr->gso_type);
169296f96fcSRusty Russell 			goto frame_err;
170296f96fcSRusty Russell 		}
171296f96fcSRusty Russell 
17234a48579SRusty Russell 		if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
17334a48579SRusty Russell 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
17434a48579SRusty Russell 
175296f96fcSRusty Russell 		skb_shinfo(skb)->gso_size = hdr->gso_size;
176296f96fcSRusty Russell 		if (skb_shinfo(skb)->gso_size == 0) {
177296f96fcSRusty Russell 			if (net_ratelimit())
178296f96fcSRusty Russell 				printk(KERN_WARNING "%s: zero gso size.\n",
179296f96fcSRusty Russell 				       dev->name);
180296f96fcSRusty Russell 			goto frame_err;
181296f96fcSRusty Russell 		}
182296f96fcSRusty Russell 
183296f96fcSRusty Russell 		/* Header must be checked, and gso_segs computed. */
184296f96fcSRusty Russell 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
185296f96fcSRusty Russell 		skb_shinfo(skb)->gso_segs = 0;
186296f96fcSRusty Russell 	}
187296f96fcSRusty Russell 
188296f96fcSRusty Russell 	netif_receive_skb(skb);
189296f96fcSRusty Russell 	return;
190296f96fcSRusty Russell 
191296f96fcSRusty Russell frame_err:
192296f96fcSRusty Russell 	dev->stats.rx_frame_errors++;
193296f96fcSRusty Russell drop:
194296f96fcSRusty Russell 	dev_kfree_skb(skb);
195296f96fcSRusty Russell }
196296f96fcSRusty Russell 
197296f96fcSRusty Russell static void try_fill_recv(struct virtnet_info *vi)
198296f96fcSRusty Russell {
199296f96fcSRusty Russell 	struct sk_buff *skb;
20005271685SRusty Russell 	struct scatterlist sg[2+MAX_SKB_FRAGS];
20197402b96SHerbert Xu 	int num, err, i;
202296f96fcSRusty Russell 
20305271685SRusty Russell 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
204296f96fcSRusty Russell 	for (;;) {
205296f96fcSRusty Russell 		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
206296f96fcSRusty Russell 		if (unlikely(!skb))
207296f96fcSRusty Russell 			break;
208296f96fcSRusty Russell 
209296f96fcSRusty Russell 		skb_put(skb, MAX_PACKET_LEN);
210296f96fcSRusty Russell 		vnet_hdr_to_sg(sg, skb);
21197402b96SHerbert Xu 
21297402b96SHerbert Xu 		if (vi->big_packets) {
21397402b96SHerbert Xu 			for (i = 0; i < MAX_SKB_FRAGS; i++) {
21497402b96SHerbert Xu 				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
215*fb6813f4SRusty Russell 				f->page = get_a_page(vi, GFP_ATOMIC);
21697402b96SHerbert Xu 				if (!f->page)
21797402b96SHerbert Xu 					break;
21897402b96SHerbert Xu 
21997402b96SHerbert Xu 				f->page_offset = 0;
22097402b96SHerbert Xu 				f->size = PAGE_SIZE;
22197402b96SHerbert Xu 
22297402b96SHerbert Xu 				skb->data_len += PAGE_SIZE;
22397402b96SHerbert Xu 				skb->len += PAGE_SIZE;
22497402b96SHerbert Xu 
22597402b96SHerbert Xu 				skb_shinfo(skb)->nr_frags++;
22697402b96SHerbert Xu 			}
22797402b96SHerbert Xu 		}
22897402b96SHerbert Xu 
229296f96fcSRusty Russell 		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
230296f96fcSRusty Russell 		skb_queue_head(&vi->recv, skb);
231296f96fcSRusty Russell 
232296f96fcSRusty Russell 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
233296f96fcSRusty Russell 		if (err) {
234296f96fcSRusty Russell 			skb_unlink(skb, &vi->recv);
235296f96fcSRusty Russell 			kfree_skb(skb);
236296f96fcSRusty Russell 			break;
237296f96fcSRusty Russell 		}
238296f96fcSRusty Russell 		vi->num++;
239296f96fcSRusty Russell 	}
240296f96fcSRusty Russell 	if (unlikely(vi->num > vi->max))
241296f96fcSRusty Russell 		vi->max = vi->num;
242296f96fcSRusty Russell 	vi->rvq->vq_ops->kick(vi->rvq);
243296f96fcSRusty Russell }
244296f96fcSRusty Russell 
24518445c4dSRusty Russell static void skb_recv_done(struct virtqueue *rvq)
246296f96fcSRusty Russell {
247296f96fcSRusty Russell 	struct virtnet_info *vi = rvq->vdev->priv;
24818445c4dSRusty Russell 	/* Schedule NAPI, Suppress further interrupts if successful. */
24918445c4dSRusty Russell 	if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
25018445c4dSRusty Russell 		rvq->vq_ops->disable_cb(rvq);
25118445c4dSRusty Russell 		__netif_rx_schedule(vi->dev, &vi->napi);
25218445c4dSRusty Russell 	}
253296f96fcSRusty Russell }
254296f96fcSRusty Russell 
255296f96fcSRusty Russell static int virtnet_poll(struct napi_struct *napi, int budget)
256296f96fcSRusty Russell {
257296f96fcSRusty Russell 	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
258296f96fcSRusty Russell 	struct sk_buff *skb = NULL;
259296f96fcSRusty Russell 	unsigned int len, received = 0;
260296f96fcSRusty Russell 
261296f96fcSRusty Russell again:
262296f96fcSRusty Russell 	while (received < budget &&
263296f96fcSRusty Russell 	       (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
264296f96fcSRusty Russell 		__skb_unlink(skb, &vi->recv);
265296f96fcSRusty Russell 		receive_skb(vi->dev, skb, len);
266296f96fcSRusty Russell 		vi->num--;
267296f96fcSRusty Russell 		received++;
268296f96fcSRusty Russell 	}
269296f96fcSRusty Russell 
270296f96fcSRusty Russell 	/* FIXME: If we oom and completely run out of inbufs, we need
271296f96fcSRusty Russell 	 * to start a timer trying to fill more. */
272296f96fcSRusty Russell 	if (vi->num < vi->max / 2)
273296f96fcSRusty Russell 		try_fill_recv(vi);
274296f96fcSRusty Russell 
2758329d98eSRusty Russell 	/* Out of packets? */
2768329d98eSRusty Russell 	if (received < budget) {
277296f96fcSRusty Russell 		netif_rx_complete(vi->dev, napi);
27818445c4dSRusty Russell 		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
2794265f161SChristian Borntraeger 		    && napi_schedule_prep(napi)) {
2804265f161SChristian Borntraeger 			vi->rvq->vq_ops->disable_cb(vi->rvq);
2814265f161SChristian Borntraeger 			__netif_rx_schedule(vi->dev, napi);
282296f96fcSRusty Russell 			goto again;
283296f96fcSRusty Russell 		}
2844265f161SChristian Borntraeger 	}
285296f96fcSRusty Russell 
286296f96fcSRusty Russell 	return received;
287296f96fcSRusty Russell }
288296f96fcSRusty Russell 
289296f96fcSRusty Russell static void free_old_xmit_skbs(struct virtnet_info *vi)
290296f96fcSRusty Russell {
291296f96fcSRusty Russell 	struct sk_buff *skb;
292296f96fcSRusty Russell 	unsigned int len;
293296f96fcSRusty Russell 
294296f96fcSRusty Russell 	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
295296f96fcSRusty Russell 		pr_debug("Sent skb %p\n", skb);
296296f96fcSRusty Russell 		__skb_unlink(skb, &vi->send);
297655aa31fSRusty Russell 		vi->dev->stats.tx_bytes += skb->len;
298296f96fcSRusty Russell 		vi->dev->stats.tx_packets++;
299296f96fcSRusty Russell 		kfree_skb(skb);
300296f96fcSRusty Russell 	}
301296f96fcSRusty Russell }
302296f96fcSRusty Russell 
303363f1514SRusty Russell /* If the virtio transport doesn't always notify us when all in-flight packets
304363f1514SRusty Russell  * are consumed, we fall back to using this function on a timer to free them. */
30514c998f0SMark McLoughlin static void xmit_free(unsigned long data)
30614c998f0SMark McLoughlin {
30714c998f0SMark McLoughlin 	struct virtnet_info *vi = (void *)data;
30814c998f0SMark McLoughlin 
30914c998f0SMark McLoughlin 	netif_tx_lock(vi->dev);
31014c998f0SMark McLoughlin 
31114c998f0SMark McLoughlin 	free_old_xmit_skbs(vi);
31214c998f0SMark McLoughlin 
31314c998f0SMark McLoughlin 	if (!skb_queue_empty(&vi->send))
31414c998f0SMark McLoughlin 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
31514c998f0SMark McLoughlin 
31614c998f0SMark McLoughlin 	netif_tx_unlock(vi->dev);
31714c998f0SMark McLoughlin }
31814c998f0SMark McLoughlin 
31999ffc696SRusty Russell static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
320296f96fcSRusty Russell {
32114c998f0SMark McLoughlin 	int num, err;
32205271685SRusty Russell 	struct scatterlist sg[2+MAX_SKB_FRAGS];
323296f96fcSRusty Russell 	struct virtio_net_hdr *hdr;
324296f96fcSRusty Russell 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
325296f96fcSRusty Russell 
32605271685SRusty Russell 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
3274d125de3SRusty Russell 
32899ffc696SRusty Russell 	pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb,
32921f644f3SDavid S. Miller 		 dest[0], dest[1], dest[2],
33021f644f3SDavid S. Miller 		 dest[3], dest[4], dest[5]);
331296f96fcSRusty Russell 
332296f96fcSRusty Russell 	/* Encode metadata header at front. */
333296f96fcSRusty Russell 	hdr = skb_vnet_hdr(skb);
334296f96fcSRusty Russell 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
335296f96fcSRusty Russell 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
336296f96fcSRusty Russell 		hdr->csum_start = skb->csum_start - skb_headroom(skb);
337296f96fcSRusty Russell 		hdr->csum_offset = skb->csum_offset;
338296f96fcSRusty Russell 	} else {
339296f96fcSRusty Russell 		hdr->flags = 0;
340296f96fcSRusty Russell 		hdr->csum_offset = hdr->csum_start = 0;
341296f96fcSRusty Russell 	}
342296f96fcSRusty Russell 
343296f96fcSRusty Russell 	if (skb_is_gso(skb)) {
34450c8ea80SRusty Russell 		hdr->hdr_len = skb_transport_header(skb) - skb->data;
345296f96fcSRusty Russell 		hdr->gso_size = skb_shinfo(skb)->gso_size;
34634a48579SRusty Russell 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
347296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
348296f96fcSRusty Russell 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
349296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
350296f96fcSRusty Russell 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
351296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
352296f96fcSRusty Russell 		else
353296f96fcSRusty Russell 			BUG();
35434a48579SRusty Russell 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
35534a48579SRusty Russell 			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
356296f96fcSRusty Russell 	} else {
357296f96fcSRusty Russell 		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
35850c8ea80SRusty Russell 		hdr->gso_size = hdr->hdr_len = 0;
359296f96fcSRusty Russell 	}
360296f96fcSRusty Russell 
361296f96fcSRusty Russell 	vnet_hdr_to_sg(sg, skb);
362296f96fcSRusty Russell 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
36399ffc696SRusty Russell 
36414c998f0SMark McLoughlin 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
365363f1514SRusty Russell 	if (!err && !vi->free_in_tasklet)
36614c998f0SMark McLoughlin 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
36714c998f0SMark McLoughlin 
36814c998f0SMark McLoughlin 	return err;
36999ffc696SRusty Russell }
37099ffc696SRusty Russell 
37111a3a154SRusty Russell static void xmit_tasklet(unsigned long data)
37211a3a154SRusty Russell {
37311a3a154SRusty Russell 	struct virtnet_info *vi = (void *)data;
37411a3a154SRusty Russell 
37511a3a154SRusty Russell 	netif_tx_lock_bh(vi->dev);
37611a3a154SRusty Russell 	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
37711a3a154SRusty Russell 		vi->svq->vq_ops->kick(vi->svq);
37811a3a154SRusty Russell 		vi->last_xmit_skb = NULL;
37911a3a154SRusty Russell 	}
380363f1514SRusty Russell 	if (vi->free_in_tasklet)
381363f1514SRusty Russell 		free_old_xmit_skbs(vi);
38211a3a154SRusty Russell 	netif_tx_unlock_bh(vi->dev);
38311a3a154SRusty Russell }
38411a3a154SRusty Russell 
38599ffc696SRusty Russell static int start_xmit(struct sk_buff *skb, struct net_device *dev)
38699ffc696SRusty Russell {
38799ffc696SRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
3882cb9c6baSRusty Russell 
3892cb9c6baSRusty Russell again:
3902cb9c6baSRusty Russell 	/* Free up any pending old buffers before queueing new ones. */
3912cb9c6baSRusty Russell 	free_old_xmit_skbs(vi);
39299ffc696SRusty Russell 
39399ffc696SRusty Russell 	/* If we has a buffer left over from last time, send it now. */
3949953ca6cSMark McLoughlin 	if (unlikely(vi->last_xmit_skb) &&
3959953ca6cSMark McLoughlin 	    xmit_skb(vi, vi->last_xmit_skb) != 0)
39699ffc696SRusty Russell 		goto stop_queue;
3979953ca6cSMark McLoughlin 
39899ffc696SRusty Russell 	vi->last_xmit_skb = NULL;
39999ffc696SRusty Russell 
40099ffc696SRusty Russell 	/* Put new one in send queue and do transmit */
4017eb2e251SRusty Russell 	if (likely(skb)) {
40299ffc696SRusty Russell 		__skb_queue_head(&vi->send, skb);
40399ffc696SRusty Russell 		if (xmit_skb(vi, skb) != 0) {
40499ffc696SRusty Russell 			vi->last_xmit_skb = skb;
4057eb2e251SRusty Russell 			skb = NULL;
40699ffc696SRusty Russell 			goto stop_queue;
40799ffc696SRusty Russell 		}
4087eb2e251SRusty Russell 	}
40999ffc696SRusty Russell done:
41099ffc696SRusty Russell 	vi->svq->vq_ops->kick(vi->svq);
41199ffc696SRusty Russell 	return NETDEV_TX_OK;
41299ffc696SRusty Russell 
41399ffc696SRusty Russell stop_queue:
414296f96fcSRusty Russell 	pr_debug("%s: virtio not prepared to send\n", dev->name);
415296f96fcSRusty Russell 	netif_stop_queue(dev);
4162cb9c6baSRusty Russell 
4174265f161SChristian Borntraeger 	/* Activate callback for using skbs: if this returns false it
4182cb9c6baSRusty Russell 	 * means some were used in the meantime. */
4192cb9c6baSRusty Russell 	if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
4204265f161SChristian Borntraeger 		vi->svq->vq_ops->disable_cb(vi->svq);
4212cb9c6baSRusty Russell 		netif_start_queue(dev);
4222cb9c6baSRusty Russell 		goto again;
4232cb9c6baSRusty Russell 	}
4249953ca6cSMark McLoughlin 	if (skb) {
4259953ca6cSMark McLoughlin 		/* Drop this skb: we only queue one. */
4269953ca6cSMark McLoughlin 		vi->dev->stats.tx_dropped++;
4279953ca6cSMark McLoughlin 		kfree_skb(skb);
4289953ca6cSMark McLoughlin 	}
42999ffc696SRusty Russell 	goto done;
430296f96fcSRusty Russell }
431296f96fcSRusty Russell 
432da74e89dSAmit Shah #ifdef CONFIG_NET_POLL_CONTROLLER
433da74e89dSAmit Shah static void virtnet_netpoll(struct net_device *dev)
434da74e89dSAmit Shah {
435da74e89dSAmit Shah 	struct virtnet_info *vi = netdev_priv(dev);
436da74e89dSAmit Shah 
437da74e89dSAmit Shah 	napi_schedule(&vi->napi);
438da74e89dSAmit Shah }
439da74e89dSAmit Shah #endif
440da74e89dSAmit Shah 
441296f96fcSRusty Russell static int virtnet_open(struct net_device *dev)
442296f96fcSRusty Russell {
443296f96fcSRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
444296f96fcSRusty Russell 
445296f96fcSRusty Russell 	napi_enable(&vi->napi);
446a48bd8f6SRusty Russell 
447a48bd8f6SRusty Russell 	/* If all buffers were filled by other side before we napi_enabled, we
448a48bd8f6SRusty Russell 	 * won't get another interrupt, so process any outstanding packets
449370076d9SChristian Borntraeger 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
450370076d9SChristian Borntraeger 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
451370076d9SChristian Borntraeger 	if (netif_rx_schedule_prep(dev, &vi->napi)) {
452a48bd8f6SRusty Russell 		vi->rvq->vq_ops->disable_cb(vi->rvq);
453370076d9SChristian Borntraeger 		__netif_rx_schedule(dev, &vi->napi);
454370076d9SChristian Borntraeger 	}
455296f96fcSRusty Russell 	return 0;
456296f96fcSRusty Russell }
457296f96fcSRusty Russell 
458296f96fcSRusty Russell static int virtnet_close(struct net_device *dev)
459296f96fcSRusty Russell {
460296f96fcSRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
461296f96fcSRusty Russell 
462296f96fcSRusty Russell 	napi_disable(&vi->napi);
463296f96fcSRusty Russell 
464296f96fcSRusty Russell 	return 0;
465296f96fcSRusty Russell }
466296f96fcSRusty Russell 
467a9ea3fc6SHerbert Xu static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
468a9ea3fc6SHerbert Xu {
469a9ea3fc6SHerbert Xu 	struct virtnet_info *vi = netdev_priv(dev);
470a9ea3fc6SHerbert Xu 	struct virtio_device *vdev = vi->vdev;
471a9ea3fc6SHerbert Xu 
472a9ea3fc6SHerbert Xu 	if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
473a9ea3fc6SHerbert Xu 		return -ENOSYS;
474a9ea3fc6SHerbert Xu 
475a9ea3fc6SHerbert Xu 	return ethtool_op_set_tx_hw_csum(dev, data);
476a9ea3fc6SHerbert Xu }
477a9ea3fc6SHerbert Xu 
478a9ea3fc6SHerbert Xu static struct ethtool_ops virtnet_ethtool_ops = {
479a9ea3fc6SHerbert Xu 	.set_tx_csum = virtnet_set_tx_csum,
480a9ea3fc6SHerbert Xu 	.set_sg = ethtool_op_set_sg,
481a9ea3fc6SHerbert Xu };
482a9ea3fc6SHerbert Xu 
483296f96fcSRusty Russell static int virtnet_probe(struct virtio_device *vdev)
484296f96fcSRusty Russell {
485296f96fcSRusty Russell 	int err;
486296f96fcSRusty Russell 	struct net_device *dev;
487296f96fcSRusty Russell 	struct virtnet_info *vi;
488296f96fcSRusty Russell 
489296f96fcSRusty Russell 	/* Allocate ourselves a network device with room for our info */
490296f96fcSRusty Russell 	dev = alloc_etherdev(sizeof(struct virtnet_info));
491296f96fcSRusty Russell 	if (!dev)
492296f96fcSRusty Russell 		return -ENOMEM;
493296f96fcSRusty Russell 
494296f96fcSRusty Russell 	/* Set up network device as normal. */
495296f96fcSRusty Russell 	dev->open = virtnet_open;
496296f96fcSRusty Russell 	dev->stop = virtnet_close;
497296f96fcSRusty Russell 	dev->hard_start_xmit = start_xmit;
498296f96fcSRusty Russell 	dev->features = NETIF_F_HIGHDMA;
499da74e89dSAmit Shah #ifdef CONFIG_NET_POLL_CONTROLLER
500da74e89dSAmit Shah 	dev->poll_controller = virtnet_netpoll;
501da74e89dSAmit Shah #endif
502a9ea3fc6SHerbert Xu 	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
503296f96fcSRusty Russell 	SET_NETDEV_DEV(dev, &vdev->dev);
504296f96fcSRusty Russell 
505296f96fcSRusty Russell 	/* Do we support "hardware" checksums? */
506c45a6816SRusty Russell 	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
507296f96fcSRusty Russell 		/* This opens up the world of extra features. */
508296f96fcSRusty Russell 		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
509c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
51034a48579SRusty Russell 			dev->features |= NETIF_F_TSO | NETIF_F_UFO
51134a48579SRusty Russell 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
51234a48579SRusty Russell 		}
5135539ae96SRusty Russell 		/* Individual feature bits: what can host handle? */
514c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
5155539ae96SRusty Russell 			dev->features |= NETIF_F_TSO;
516c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
5175539ae96SRusty Russell 			dev->features |= NETIF_F_TSO6;
518c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
5195539ae96SRusty Russell 			dev->features |= NETIF_F_TSO_ECN;
520c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
5215539ae96SRusty Russell 			dev->features |= NETIF_F_UFO;
522296f96fcSRusty Russell 	}
523296f96fcSRusty Russell 
524296f96fcSRusty Russell 	/* Configuration may specify what MAC to use.  Otherwise random. */
525c45a6816SRusty Russell 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
526a586d4f6SRusty Russell 		vdev->config->get(vdev,
527a586d4f6SRusty Russell 				  offsetof(struct virtio_net_config, mac),
528a586d4f6SRusty Russell 				  dev->dev_addr, dev->addr_len);
529296f96fcSRusty Russell 	} else
530296f96fcSRusty Russell 		random_ether_addr(dev->dev_addr);
531296f96fcSRusty Russell 
532296f96fcSRusty Russell 	/* Set up our device-specific information */
533296f96fcSRusty Russell 	vi = netdev_priv(dev);
5346c0cd7c0SDor Laor 	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
535296f96fcSRusty Russell 	vi->dev = dev;
536296f96fcSRusty Russell 	vi->vdev = vdev;
537d9d5dcc8SChristian Borntraeger 	vdev->priv = vi;
538*fb6813f4SRusty Russell 	vi->pages = NULL;
539296f96fcSRusty Russell 
540363f1514SRusty Russell 	/* If they give us a callback when all buffers are done, we don't need
541363f1514SRusty Russell 	 * the timer. */
542363f1514SRusty Russell 	vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
543363f1514SRusty Russell 
54497402b96SHerbert Xu 	/* If we can receive ANY GSO packets, we must allocate large ones. */
54597402b96SHerbert Xu 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
54697402b96SHerbert Xu 	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
54797402b96SHerbert Xu 	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
54897402b96SHerbert Xu 		vi->big_packets = true;
54997402b96SHerbert Xu 
550296f96fcSRusty Russell 	/* We expect two virtqueues, receive then send. */
551a586d4f6SRusty Russell 	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
552296f96fcSRusty Russell 	if (IS_ERR(vi->rvq)) {
553296f96fcSRusty Russell 		err = PTR_ERR(vi->rvq);
554296f96fcSRusty Russell 		goto free;
555296f96fcSRusty Russell 	}
556296f96fcSRusty Russell 
557a586d4f6SRusty Russell 	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
558296f96fcSRusty Russell 	if (IS_ERR(vi->svq)) {
559296f96fcSRusty Russell 		err = PTR_ERR(vi->svq);
560296f96fcSRusty Russell 		goto free_recv;
561296f96fcSRusty Russell 	}
562296f96fcSRusty Russell 
563296f96fcSRusty Russell 	/* Initialize our empty receive and send queues. */
564296f96fcSRusty Russell 	skb_queue_head_init(&vi->recv);
565296f96fcSRusty Russell 	skb_queue_head_init(&vi->send);
566296f96fcSRusty Russell 
56711a3a154SRusty Russell 	tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);
56811a3a154SRusty Russell 
569363f1514SRusty Russell 	if (!vi->free_in_tasklet)
57014c998f0SMark McLoughlin 		setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);
57114c998f0SMark McLoughlin 
572296f96fcSRusty Russell 	err = register_netdev(dev);
573296f96fcSRusty Russell 	if (err) {
574296f96fcSRusty Russell 		pr_debug("virtio_net: registering device failed\n");
575296f96fcSRusty Russell 		goto free_send;
576296f96fcSRusty Russell 	}
577b3369c1fSRusty Russell 
578b3369c1fSRusty Russell 	/* Last of all, set up some receive buffers. */
579b3369c1fSRusty Russell 	try_fill_recv(vi);
580b3369c1fSRusty Russell 
581b3369c1fSRusty Russell 	/* If we didn't even get one input buffer, we're useless. */
582b3369c1fSRusty Russell 	if (vi->num == 0) {
583b3369c1fSRusty Russell 		err = -ENOMEM;
584b3369c1fSRusty Russell 		goto unregister;
585b3369c1fSRusty Russell 	}
586b3369c1fSRusty Russell 
587296f96fcSRusty Russell 	pr_debug("virtnet: registered device %s\n", dev->name);
588296f96fcSRusty Russell 	return 0;
589296f96fcSRusty Russell 
590b3369c1fSRusty Russell unregister:
591b3369c1fSRusty Russell 	unregister_netdev(dev);
592296f96fcSRusty Russell free_send:
593296f96fcSRusty Russell 	vdev->config->del_vq(vi->svq);
594296f96fcSRusty Russell free_recv:
595296f96fcSRusty Russell 	vdev->config->del_vq(vi->rvq);
596296f96fcSRusty Russell free:
597296f96fcSRusty Russell 	free_netdev(dev);
598296f96fcSRusty Russell 	return err;
599296f96fcSRusty Russell }
600296f96fcSRusty Russell 
601296f96fcSRusty Russell static void virtnet_remove(struct virtio_device *vdev)
602296f96fcSRusty Russell {
60374b2553fSRusty Russell 	struct virtnet_info *vi = vdev->priv;
604b3369c1fSRusty Russell 	struct sk_buff *skb;
605b3369c1fSRusty Russell 
6066e5aa7efSRusty Russell 	/* Stop all the virtqueues. */
6076e5aa7efSRusty Russell 	vdev->config->reset(vdev);
6086e5aa7efSRusty Russell 
609363f1514SRusty Russell 	if (!vi->free_in_tasklet)
61014c998f0SMark McLoughlin 		del_timer_sync(&vi->xmit_free_timer);
61114c998f0SMark McLoughlin 
612b3369c1fSRusty Russell 	/* Free our skbs in send and recv queues, if any. */
613b3369c1fSRusty Russell 	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
614b3369c1fSRusty Russell 		kfree_skb(skb);
615b3369c1fSRusty Russell 		vi->num--;
616b3369c1fSRusty Russell 	}
617288369ccSWang Chen 	__skb_queue_purge(&vi->send);
618b3369c1fSRusty Russell 
619b3369c1fSRusty Russell 	BUG_ON(vi->num != 0);
62074b2553fSRusty Russell 
62174b2553fSRusty Russell 	vdev->config->del_vq(vi->svq);
62274b2553fSRusty Russell 	vdev->config->del_vq(vi->rvq);
62374b2553fSRusty Russell 	unregister_netdev(vi->dev);
624*fb6813f4SRusty Russell 
625*fb6813f4SRusty Russell 	while (vi->pages)
626*fb6813f4SRusty Russell 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
627*fb6813f4SRusty Russell 
62874b2553fSRusty Russell 	free_netdev(vi->dev);
629296f96fcSRusty Russell }
630296f96fcSRusty Russell 
631296f96fcSRusty Russell static struct virtio_device_id id_table[] = {
632296f96fcSRusty Russell 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
633296f96fcSRusty Russell 	{ 0 },
634296f96fcSRusty Russell };
635296f96fcSRusty Russell 
636c45a6816SRusty Russell static unsigned int features[] = {
6375e4fe5c4SMark McLoughlin 	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
6385e4fe5c4SMark McLoughlin 	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
639c45a6816SRusty Russell 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
64097402b96SHerbert Xu 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
64197402b96SHerbert Xu 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
64297402b96SHerbert Xu 	VIRTIO_F_NOTIFY_ON_EMPTY,
643c45a6816SRusty Russell };
644c45a6816SRusty Russell 
645296f96fcSRusty Russell static struct virtio_driver virtio_net = {
646c45a6816SRusty Russell 	.feature_table = features,
647c45a6816SRusty Russell 	.feature_table_size = ARRAY_SIZE(features),
648296f96fcSRusty Russell 	.driver.name =	KBUILD_MODNAME,
649296f96fcSRusty Russell 	.driver.owner =	THIS_MODULE,
650296f96fcSRusty Russell 	.id_table =	id_table,
651296f96fcSRusty Russell 	.probe =	virtnet_probe,
652296f96fcSRusty Russell 	.remove =	__devexit_p(virtnet_remove),
653296f96fcSRusty Russell };
654296f96fcSRusty Russell 
655296f96fcSRusty Russell static int __init init(void)
656296f96fcSRusty Russell {
657296f96fcSRusty Russell 	return register_virtio_driver(&virtio_net);
658296f96fcSRusty Russell }
659296f96fcSRusty Russell 
660296f96fcSRusty Russell static void __exit fini(void)
661296f96fcSRusty Russell {
662296f96fcSRusty Russell 	unregister_virtio_driver(&virtio_net);
663296f96fcSRusty Russell }
664296f96fcSRusty Russell module_init(init);
665296f96fcSRusty Russell module_exit(fini);
666296f96fcSRusty Russell 
667296f96fcSRusty Russell MODULE_DEVICE_TABLE(virtio, id_table);
668296f96fcSRusty Russell MODULE_DESCRIPTION("Virtio network driver");
669296f96fcSRusty Russell MODULE_LICENSE("GPL");
670