xref: /openbmc/linux/drivers/net/virtio_net.c (revision e174961ca1a0b28f7abf0be47973ad57cb74e5f0)
1296f96fcSRusty Russell /* A simple network driver using virtio.
2296f96fcSRusty Russell  *
3296f96fcSRusty Russell  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4296f96fcSRusty Russell  *
5296f96fcSRusty Russell  * This program is free software; you can redistribute it and/or modify
6296f96fcSRusty Russell  * it under the terms of the GNU General Public License as published by
7296f96fcSRusty Russell  * the Free Software Foundation; either version 2 of the License, or
8296f96fcSRusty Russell  * (at your option) any later version.
9296f96fcSRusty Russell  *
10296f96fcSRusty Russell  * This program is distributed in the hope that it will be useful,
11296f96fcSRusty Russell  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12296f96fcSRusty Russell  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13296f96fcSRusty Russell  * GNU General Public License for more details.
14296f96fcSRusty Russell  *
15296f96fcSRusty Russell  * You should have received a copy of the GNU General Public License
16296f96fcSRusty Russell  * along with this program; if not, write to the Free Software
17296f96fcSRusty Russell  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18296f96fcSRusty Russell  */
19296f96fcSRusty Russell //#define DEBUG
20296f96fcSRusty Russell #include <linux/netdevice.h>
21296f96fcSRusty Russell #include <linux/etherdevice.h>
22a9ea3fc6SHerbert Xu #include <linux/ethtool.h>
23296f96fcSRusty Russell #include <linux/module.h>
24296f96fcSRusty Russell #include <linux/virtio.h>
25296f96fcSRusty Russell #include <linux/virtio_net.h>
26296f96fcSRusty Russell #include <linux/scatterlist.h>
27296f96fcSRusty Russell 
286c0cd7c0SDor Laor static int napi_weight = 128;
296c0cd7c0SDor Laor module_param(napi_weight, int, 0444);
306c0cd7c0SDor Laor 
3134a48579SRusty Russell static int csum = 1, gso = 1;
3234a48579SRusty Russell module_param(csum, bool, 0444);
3334a48579SRusty Russell module_param(gso, bool, 0444);
3434a48579SRusty Russell 
35296f96fcSRusty Russell /* FIXME: MTU in config. */
36296f96fcSRusty Russell #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
37296f96fcSRusty Russell 
38296f96fcSRusty Russell struct virtnet_info
39296f96fcSRusty Russell {
40296f96fcSRusty Russell 	struct virtio_device *vdev;
41296f96fcSRusty Russell 	struct virtqueue *rvq, *svq;
42296f96fcSRusty Russell 	struct net_device *dev;
43296f96fcSRusty Russell 	struct napi_struct napi;
44296f96fcSRusty Russell 
4599ffc696SRusty Russell 	/* The skb we couldn't send because buffers were full. */
4699ffc696SRusty Russell 	struct sk_buff *last_xmit_skb;
4799ffc696SRusty Russell 
48363f1514SRusty Russell 	/* If we need to free in a timer, this is it. */
4914c998f0SMark McLoughlin 	struct timer_list xmit_free_timer;
5014c998f0SMark McLoughlin 
51296f96fcSRusty Russell 	/* Number of input buffers, and max we've ever had. */
52296f96fcSRusty Russell 	unsigned int num, max;
53296f96fcSRusty Russell 
5411a3a154SRusty Russell 	/* For cleaning up after transmission. */
5511a3a154SRusty Russell 	struct tasklet_struct tasklet;
56363f1514SRusty Russell 	bool free_in_tasklet;
5711a3a154SRusty Russell 
5897402b96SHerbert Xu 	/* I like... big packets and I cannot lie! */
5997402b96SHerbert Xu 	bool big_packets;
6097402b96SHerbert Xu 
61296f96fcSRusty Russell 	/* Receive & send queues. */
62296f96fcSRusty Russell 	struct sk_buff_head recv;
63296f96fcSRusty Russell 	struct sk_buff_head send;
64fb6813f4SRusty Russell 
65fb6813f4SRusty Russell 	/* Chain pages by the private ptr. */
66fb6813f4SRusty Russell 	struct page *pages;
67296f96fcSRusty Russell };
68296f96fcSRusty Russell 
69296f96fcSRusty Russell static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
70296f96fcSRusty Russell {
71296f96fcSRusty Russell 	return (struct virtio_net_hdr *)skb->cb;
72296f96fcSRusty Russell }
73296f96fcSRusty Russell 
74296f96fcSRusty Russell static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
75296f96fcSRusty Russell {
76296f96fcSRusty Russell 	sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
77296f96fcSRusty Russell }
78296f96fcSRusty Russell 
79fb6813f4SRusty Russell static void give_a_page(struct virtnet_info *vi, struct page *page)
80fb6813f4SRusty Russell {
81fb6813f4SRusty Russell 	page->private = (unsigned long)vi->pages;
82fb6813f4SRusty Russell 	vi->pages = page;
83fb6813f4SRusty Russell }
84fb6813f4SRusty Russell 
85fb6813f4SRusty Russell static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
86fb6813f4SRusty Russell {
87fb6813f4SRusty Russell 	struct page *p = vi->pages;
88fb6813f4SRusty Russell 
89fb6813f4SRusty Russell 	if (p)
90fb6813f4SRusty Russell 		vi->pages = (struct page *)p->private;
91fb6813f4SRusty Russell 	else
92fb6813f4SRusty Russell 		p = alloc_page(gfp_mask);
93fb6813f4SRusty Russell 	return p;
94fb6813f4SRusty Russell }
95fb6813f4SRusty Russell 
962cb9c6baSRusty Russell static void skb_xmit_done(struct virtqueue *svq)
97296f96fcSRusty Russell {
982cb9c6baSRusty Russell 	struct virtnet_info *vi = svq->vdev->priv;
99296f96fcSRusty Russell 
1002cb9c6baSRusty Russell 	/* Suppress further interrupts. */
1012cb9c6baSRusty Russell 	svq->vq_ops->disable_cb(svq);
10211a3a154SRusty Russell 
103363f1514SRusty Russell 	/* We were probably waiting for more output buffers. */
104296f96fcSRusty Russell 	netif_wake_queue(vi->dev);
10511a3a154SRusty Russell 
10611a3a154SRusty Russell 	/* Make sure we re-xmit last_xmit_skb: if there are no more packets
10711a3a154SRusty Russell 	 * queued, start_xmit won't be called. */
10811a3a154SRusty Russell 	tasklet_schedule(&vi->tasklet);
109296f96fcSRusty Russell }
110296f96fcSRusty Russell 
111296f96fcSRusty Russell static void receive_skb(struct net_device *dev, struct sk_buff *skb,
112296f96fcSRusty Russell 			unsigned len)
113296f96fcSRusty Russell {
114296f96fcSRusty Russell 	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
11597402b96SHerbert Xu 	int err;
116296f96fcSRusty Russell 
117296f96fcSRusty Russell 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
118296f96fcSRusty Russell 		pr_debug("%s: short packet %i\n", dev->name, len);
119296f96fcSRusty Russell 		dev->stats.rx_length_errors++;
120296f96fcSRusty Russell 		goto drop;
121296f96fcSRusty Russell 	}
122296f96fcSRusty Russell 	len -= sizeof(struct virtio_net_hdr);
123296f96fcSRusty Russell 
124fb6813f4SRusty Russell 	if (len <= MAX_PACKET_LEN) {
125fb6813f4SRusty Russell 		unsigned int i;
126fb6813f4SRusty Russell 
127fb6813f4SRusty Russell 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
128fb6813f4SRusty Russell 			give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
129fb6813f4SRusty Russell 		skb->data_len = 0;
130fb6813f4SRusty Russell 		skb_shinfo(skb)->nr_frags = 0;
131fb6813f4SRusty Russell 	}
132fb6813f4SRusty Russell 
13397402b96SHerbert Xu 	err = pskb_trim(skb, len);
13497402b96SHerbert Xu 	if (err) {
13597402b96SHerbert Xu 		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
13697402b96SHerbert Xu 		dev->stats.rx_dropped++;
13797402b96SHerbert Xu 		goto drop;
13897402b96SHerbert Xu 	}
13997402b96SHerbert Xu 	skb->truesize += skb->data_len;
140296f96fcSRusty Russell 	dev->stats.rx_bytes += skb->len;
141296f96fcSRusty Russell 	dev->stats.rx_packets++;
142296f96fcSRusty Russell 
143296f96fcSRusty Russell 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
144296f96fcSRusty Russell 		pr_debug("Needs csum!\n");
145f35d9d8aSRusty Russell 		if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
146296f96fcSRusty Russell 			goto frame_err;
147296f96fcSRusty Russell 	}
148296f96fcSRusty Russell 
14923cde76dSMark McLoughlin 	skb->protocol = eth_type_trans(skb, dev);
15023cde76dSMark McLoughlin 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
15123cde76dSMark McLoughlin 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
15223cde76dSMark McLoughlin 
153296f96fcSRusty Russell 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
154296f96fcSRusty Russell 		pr_debug("GSO!\n");
15534a48579SRusty Russell 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
156296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_TCPV4:
157296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
158296f96fcSRusty Russell 			break;
159296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_UDP:
160296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
161296f96fcSRusty Russell 			break;
162296f96fcSRusty Russell 		case VIRTIO_NET_HDR_GSO_TCPV6:
163296f96fcSRusty Russell 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
164296f96fcSRusty Russell 			break;
165296f96fcSRusty Russell 		default:
166296f96fcSRusty Russell 			if (net_ratelimit())
167296f96fcSRusty Russell 				printk(KERN_WARNING "%s: bad gso type %u.\n",
168296f96fcSRusty Russell 				       dev->name, hdr->gso_type);
169296f96fcSRusty Russell 			goto frame_err;
170296f96fcSRusty Russell 		}
171296f96fcSRusty Russell 
17234a48579SRusty Russell 		if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
17334a48579SRusty Russell 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
17434a48579SRusty Russell 
175296f96fcSRusty Russell 		skb_shinfo(skb)->gso_size = hdr->gso_size;
176296f96fcSRusty Russell 		if (skb_shinfo(skb)->gso_size == 0) {
177296f96fcSRusty Russell 			if (net_ratelimit())
178296f96fcSRusty Russell 				printk(KERN_WARNING "%s: zero gso size.\n",
179296f96fcSRusty Russell 				       dev->name);
180296f96fcSRusty Russell 			goto frame_err;
181296f96fcSRusty Russell 		}
182296f96fcSRusty Russell 
183296f96fcSRusty Russell 		/* Header must be checked, and gso_segs computed. */
184296f96fcSRusty Russell 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
185296f96fcSRusty Russell 		skb_shinfo(skb)->gso_segs = 0;
186296f96fcSRusty Russell 	}
187296f96fcSRusty Russell 
188296f96fcSRusty Russell 	netif_receive_skb(skb);
189296f96fcSRusty Russell 	return;
190296f96fcSRusty Russell 
191296f96fcSRusty Russell frame_err:
192296f96fcSRusty Russell 	dev->stats.rx_frame_errors++;
193296f96fcSRusty Russell drop:
194296f96fcSRusty Russell 	dev_kfree_skb(skb);
195296f96fcSRusty Russell }
196296f96fcSRusty Russell 
197296f96fcSRusty Russell static void try_fill_recv(struct virtnet_info *vi)
198296f96fcSRusty Russell {
199296f96fcSRusty Russell 	struct sk_buff *skb;
20005271685SRusty Russell 	struct scatterlist sg[2+MAX_SKB_FRAGS];
20197402b96SHerbert Xu 	int num, err, i;
202296f96fcSRusty Russell 
20305271685SRusty Russell 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
204296f96fcSRusty Russell 	for (;;) {
205296f96fcSRusty Russell 		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
206296f96fcSRusty Russell 		if (unlikely(!skb))
207296f96fcSRusty Russell 			break;
208296f96fcSRusty Russell 
209296f96fcSRusty Russell 		skb_put(skb, MAX_PACKET_LEN);
210296f96fcSRusty Russell 		vnet_hdr_to_sg(sg, skb);
21197402b96SHerbert Xu 
21297402b96SHerbert Xu 		if (vi->big_packets) {
21397402b96SHerbert Xu 			for (i = 0; i < MAX_SKB_FRAGS; i++) {
21497402b96SHerbert Xu 				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
215fb6813f4SRusty Russell 				f->page = get_a_page(vi, GFP_ATOMIC);
21697402b96SHerbert Xu 				if (!f->page)
21797402b96SHerbert Xu 					break;
21897402b96SHerbert Xu 
21997402b96SHerbert Xu 				f->page_offset = 0;
22097402b96SHerbert Xu 				f->size = PAGE_SIZE;
22197402b96SHerbert Xu 
22297402b96SHerbert Xu 				skb->data_len += PAGE_SIZE;
22397402b96SHerbert Xu 				skb->len += PAGE_SIZE;
22497402b96SHerbert Xu 
22597402b96SHerbert Xu 				skb_shinfo(skb)->nr_frags++;
22697402b96SHerbert Xu 			}
22797402b96SHerbert Xu 		}
22897402b96SHerbert Xu 
229296f96fcSRusty Russell 		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
230296f96fcSRusty Russell 		skb_queue_head(&vi->recv, skb);
231296f96fcSRusty Russell 
232296f96fcSRusty Russell 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
233296f96fcSRusty Russell 		if (err) {
234296f96fcSRusty Russell 			skb_unlink(skb, &vi->recv);
235296f96fcSRusty Russell 			kfree_skb(skb);
236296f96fcSRusty Russell 			break;
237296f96fcSRusty Russell 		}
238296f96fcSRusty Russell 		vi->num++;
239296f96fcSRusty Russell 	}
240296f96fcSRusty Russell 	if (unlikely(vi->num > vi->max))
241296f96fcSRusty Russell 		vi->max = vi->num;
242296f96fcSRusty Russell 	vi->rvq->vq_ops->kick(vi->rvq);
243296f96fcSRusty Russell }
244296f96fcSRusty Russell 
24518445c4dSRusty Russell static void skb_recv_done(struct virtqueue *rvq)
246296f96fcSRusty Russell {
247296f96fcSRusty Russell 	struct virtnet_info *vi = rvq->vdev->priv;
24818445c4dSRusty Russell 	/* Schedule NAPI, Suppress further interrupts if successful. */
24918445c4dSRusty Russell 	if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
25018445c4dSRusty Russell 		rvq->vq_ops->disable_cb(rvq);
25118445c4dSRusty Russell 		__netif_rx_schedule(vi->dev, &vi->napi);
25218445c4dSRusty Russell 	}
253296f96fcSRusty Russell }
254296f96fcSRusty Russell 
255296f96fcSRusty Russell static int virtnet_poll(struct napi_struct *napi, int budget)
256296f96fcSRusty Russell {
257296f96fcSRusty Russell 	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
258296f96fcSRusty Russell 	struct sk_buff *skb = NULL;
259296f96fcSRusty Russell 	unsigned int len, received = 0;
260296f96fcSRusty Russell 
261296f96fcSRusty Russell again:
262296f96fcSRusty Russell 	while (received < budget &&
263296f96fcSRusty Russell 	       (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
264296f96fcSRusty Russell 		__skb_unlink(skb, &vi->recv);
265296f96fcSRusty Russell 		receive_skb(vi->dev, skb, len);
266296f96fcSRusty Russell 		vi->num--;
267296f96fcSRusty Russell 		received++;
268296f96fcSRusty Russell 	}
269296f96fcSRusty Russell 
270296f96fcSRusty Russell 	/* FIXME: If we oom and completely run out of inbufs, we need
271296f96fcSRusty Russell 	 * to start a timer trying to fill more. */
272296f96fcSRusty Russell 	if (vi->num < vi->max / 2)
273296f96fcSRusty Russell 		try_fill_recv(vi);
274296f96fcSRusty Russell 
2758329d98eSRusty Russell 	/* Out of packets? */
2768329d98eSRusty Russell 	if (received < budget) {
277296f96fcSRusty Russell 		netif_rx_complete(vi->dev, napi);
27818445c4dSRusty Russell 		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
2794265f161SChristian Borntraeger 		    && napi_schedule_prep(napi)) {
2804265f161SChristian Borntraeger 			vi->rvq->vq_ops->disable_cb(vi->rvq);
2814265f161SChristian Borntraeger 			__netif_rx_schedule(vi->dev, napi);
282296f96fcSRusty Russell 			goto again;
283296f96fcSRusty Russell 		}
2844265f161SChristian Borntraeger 	}
285296f96fcSRusty Russell 
286296f96fcSRusty Russell 	return received;
287296f96fcSRusty Russell }
288296f96fcSRusty Russell 
289296f96fcSRusty Russell static void free_old_xmit_skbs(struct virtnet_info *vi)
290296f96fcSRusty Russell {
291296f96fcSRusty Russell 	struct sk_buff *skb;
292296f96fcSRusty Russell 	unsigned int len;
293296f96fcSRusty Russell 
294296f96fcSRusty Russell 	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
295296f96fcSRusty Russell 		pr_debug("Sent skb %p\n", skb);
296296f96fcSRusty Russell 		__skb_unlink(skb, &vi->send);
297655aa31fSRusty Russell 		vi->dev->stats.tx_bytes += skb->len;
298296f96fcSRusty Russell 		vi->dev->stats.tx_packets++;
299296f96fcSRusty Russell 		kfree_skb(skb);
300296f96fcSRusty Russell 	}
301296f96fcSRusty Russell }
302296f96fcSRusty Russell 
303363f1514SRusty Russell /* If the virtio transport doesn't always notify us when all in-flight packets
304363f1514SRusty Russell  * are consumed, we fall back to using this function on a timer to free them. */
30514c998f0SMark McLoughlin static void xmit_free(unsigned long data)
30614c998f0SMark McLoughlin {
30714c998f0SMark McLoughlin 	struct virtnet_info *vi = (void *)data;
30814c998f0SMark McLoughlin 
30914c998f0SMark McLoughlin 	netif_tx_lock(vi->dev);
31014c998f0SMark McLoughlin 
31114c998f0SMark McLoughlin 	free_old_xmit_skbs(vi);
31214c998f0SMark McLoughlin 
31314c998f0SMark McLoughlin 	if (!skb_queue_empty(&vi->send))
31414c998f0SMark McLoughlin 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
31514c998f0SMark McLoughlin 
31614c998f0SMark McLoughlin 	netif_tx_unlock(vi->dev);
31714c998f0SMark McLoughlin }
31814c998f0SMark McLoughlin 
31999ffc696SRusty Russell static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
320296f96fcSRusty Russell {
32114c998f0SMark McLoughlin 	int num, err;
32205271685SRusty Russell 	struct scatterlist sg[2+MAX_SKB_FRAGS];
323296f96fcSRusty Russell 	struct virtio_net_hdr *hdr;
324296f96fcSRusty Russell 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
325296f96fcSRusty Russell 
32605271685SRusty Russell 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
3274d125de3SRusty Russell 
328*e174961cSJohannes Berg 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
329296f96fcSRusty Russell 
330296f96fcSRusty Russell 	/* Encode metadata header at front. */
331296f96fcSRusty Russell 	hdr = skb_vnet_hdr(skb);
332296f96fcSRusty Russell 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
333296f96fcSRusty Russell 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
334296f96fcSRusty Russell 		hdr->csum_start = skb->csum_start - skb_headroom(skb);
335296f96fcSRusty Russell 		hdr->csum_offset = skb->csum_offset;
336296f96fcSRusty Russell 	} else {
337296f96fcSRusty Russell 		hdr->flags = 0;
338296f96fcSRusty Russell 		hdr->csum_offset = hdr->csum_start = 0;
339296f96fcSRusty Russell 	}
340296f96fcSRusty Russell 
341296f96fcSRusty Russell 	if (skb_is_gso(skb)) {
34250c8ea80SRusty Russell 		hdr->hdr_len = skb_transport_header(skb) - skb->data;
343296f96fcSRusty Russell 		hdr->gso_size = skb_shinfo(skb)->gso_size;
34434a48579SRusty Russell 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
345296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
346296f96fcSRusty Russell 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
347296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
348296f96fcSRusty Russell 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
349296f96fcSRusty Russell 			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
350296f96fcSRusty Russell 		else
351296f96fcSRusty Russell 			BUG();
35234a48579SRusty Russell 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
35334a48579SRusty Russell 			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
354296f96fcSRusty Russell 	} else {
355296f96fcSRusty Russell 		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
35650c8ea80SRusty Russell 		hdr->gso_size = hdr->hdr_len = 0;
357296f96fcSRusty Russell 	}
358296f96fcSRusty Russell 
359296f96fcSRusty Russell 	vnet_hdr_to_sg(sg, skb);
360296f96fcSRusty Russell 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
36199ffc696SRusty Russell 
36214c998f0SMark McLoughlin 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
363363f1514SRusty Russell 	if (!err && !vi->free_in_tasklet)
36414c998f0SMark McLoughlin 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
36514c998f0SMark McLoughlin 
36614c998f0SMark McLoughlin 	return err;
36799ffc696SRusty Russell }
36899ffc696SRusty Russell 
36911a3a154SRusty Russell static void xmit_tasklet(unsigned long data)
37011a3a154SRusty Russell {
37111a3a154SRusty Russell 	struct virtnet_info *vi = (void *)data;
37211a3a154SRusty Russell 
37311a3a154SRusty Russell 	netif_tx_lock_bh(vi->dev);
37411a3a154SRusty Russell 	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
37511a3a154SRusty Russell 		vi->svq->vq_ops->kick(vi->svq);
37611a3a154SRusty Russell 		vi->last_xmit_skb = NULL;
37711a3a154SRusty Russell 	}
378363f1514SRusty Russell 	if (vi->free_in_tasklet)
379363f1514SRusty Russell 		free_old_xmit_skbs(vi);
38011a3a154SRusty Russell 	netif_tx_unlock_bh(vi->dev);
38111a3a154SRusty Russell }
38211a3a154SRusty Russell 
38399ffc696SRusty Russell static int start_xmit(struct sk_buff *skb, struct net_device *dev)
38499ffc696SRusty Russell {
38599ffc696SRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
3862cb9c6baSRusty Russell 
3872cb9c6baSRusty Russell again:
3882cb9c6baSRusty Russell 	/* Free up any pending old buffers before queueing new ones. */
3892cb9c6baSRusty Russell 	free_old_xmit_skbs(vi);
39099ffc696SRusty Russell 
39199ffc696SRusty Russell 	/* If we has a buffer left over from last time, send it now. */
3929953ca6cSMark McLoughlin 	if (unlikely(vi->last_xmit_skb) &&
3939953ca6cSMark McLoughlin 	    xmit_skb(vi, vi->last_xmit_skb) != 0)
39499ffc696SRusty Russell 		goto stop_queue;
3959953ca6cSMark McLoughlin 
39699ffc696SRusty Russell 	vi->last_xmit_skb = NULL;
39799ffc696SRusty Russell 
39899ffc696SRusty Russell 	/* Put new one in send queue and do transmit */
3997eb2e251SRusty Russell 	if (likely(skb)) {
40099ffc696SRusty Russell 		__skb_queue_head(&vi->send, skb);
40199ffc696SRusty Russell 		if (xmit_skb(vi, skb) != 0) {
40299ffc696SRusty Russell 			vi->last_xmit_skb = skb;
4037eb2e251SRusty Russell 			skb = NULL;
40499ffc696SRusty Russell 			goto stop_queue;
40599ffc696SRusty Russell 		}
4067eb2e251SRusty Russell 	}
40799ffc696SRusty Russell done:
40899ffc696SRusty Russell 	vi->svq->vq_ops->kick(vi->svq);
40999ffc696SRusty Russell 	return NETDEV_TX_OK;
41099ffc696SRusty Russell 
41199ffc696SRusty Russell stop_queue:
412296f96fcSRusty Russell 	pr_debug("%s: virtio not prepared to send\n", dev->name);
413296f96fcSRusty Russell 	netif_stop_queue(dev);
4142cb9c6baSRusty Russell 
4154265f161SChristian Borntraeger 	/* Activate callback for using skbs: if this returns false it
4162cb9c6baSRusty Russell 	 * means some were used in the meantime. */
4172cb9c6baSRusty Russell 	if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
4184265f161SChristian Borntraeger 		vi->svq->vq_ops->disable_cb(vi->svq);
4192cb9c6baSRusty Russell 		netif_start_queue(dev);
4202cb9c6baSRusty Russell 		goto again;
4212cb9c6baSRusty Russell 	}
4229953ca6cSMark McLoughlin 	if (skb) {
4239953ca6cSMark McLoughlin 		/* Drop this skb: we only queue one. */
4249953ca6cSMark McLoughlin 		vi->dev->stats.tx_dropped++;
4259953ca6cSMark McLoughlin 		kfree_skb(skb);
4269953ca6cSMark McLoughlin 	}
42799ffc696SRusty Russell 	goto done;
428296f96fcSRusty Russell }
429296f96fcSRusty Russell 
430da74e89dSAmit Shah #ifdef CONFIG_NET_POLL_CONTROLLER
431da74e89dSAmit Shah static void virtnet_netpoll(struct net_device *dev)
432da74e89dSAmit Shah {
433da74e89dSAmit Shah 	struct virtnet_info *vi = netdev_priv(dev);
434da74e89dSAmit Shah 
435da74e89dSAmit Shah 	napi_schedule(&vi->napi);
436da74e89dSAmit Shah }
437da74e89dSAmit Shah #endif
438da74e89dSAmit Shah 
439296f96fcSRusty Russell static int virtnet_open(struct net_device *dev)
440296f96fcSRusty Russell {
441296f96fcSRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
442296f96fcSRusty Russell 
443296f96fcSRusty Russell 	napi_enable(&vi->napi);
444a48bd8f6SRusty Russell 
445a48bd8f6SRusty Russell 	/* If all buffers were filled by other side before we napi_enabled, we
446a48bd8f6SRusty Russell 	 * won't get another interrupt, so process any outstanding packets
447370076d9SChristian Borntraeger 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
448370076d9SChristian Borntraeger 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
449370076d9SChristian Borntraeger 	if (netif_rx_schedule_prep(dev, &vi->napi)) {
450a48bd8f6SRusty Russell 		vi->rvq->vq_ops->disable_cb(vi->rvq);
451370076d9SChristian Borntraeger 		__netif_rx_schedule(dev, &vi->napi);
452370076d9SChristian Borntraeger 	}
453296f96fcSRusty Russell 	return 0;
454296f96fcSRusty Russell }
455296f96fcSRusty Russell 
456296f96fcSRusty Russell static int virtnet_close(struct net_device *dev)
457296f96fcSRusty Russell {
458296f96fcSRusty Russell 	struct virtnet_info *vi = netdev_priv(dev);
459296f96fcSRusty Russell 
460296f96fcSRusty Russell 	napi_disable(&vi->napi);
461296f96fcSRusty Russell 
462296f96fcSRusty Russell 	return 0;
463296f96fcSRusty Russell }
464296f96fcSRusty Russell 
465a9ea3fc6SHerbert Xu static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
466a9ea3fc6SHerbert Xu {
467a9ea3fc6SHerbert Xu 	struct virtnet_info *vi = netdev_priv(dev);
468a9ea3fc6SHerbert Xu 	struct virtio_device *vdev = vi->vdev;
469a9ea3fc6SHerbert Xu 
470a9ea3fc6SHerbert Xu 	if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
471a9ea3fc6SHerbert Xu 		return -ENOSYS;
472a9ea3fc6SHerbert Xu 
473a9ea3fc6SHerbert Xu 	return ethtool_op_set_tx_hw_csum(dev, data);
474a9ea3fc6SHerbert Xu }
475a9ea3fc6SHerbert Xu 
476a9ea3fc6SHerbert Xu static struct ethtool_ops virtnet_ethtool_ops = {
477a9ea3fc6SHerbert Xu 	.set_tx_csum = virtnet_set_tx_csum,
478a9ea3fc6SHerbert Xu 	.set_sg = ethtool_op_set_sg,
479a9ea3fc6SHerbert Xu };
480a9ea3fc6SHerbert Xu 
481296f96fcSRusty Russell static int virtnet_probe(struct virtio_device *vdev)
482296f96fcSRusty Russell {
483296f96fcSRusty Russell 	int err;
484296f96fcSRusty Russell 	struct net_device *dev;
485296f96fcSRusty Russell 	struct virtnet_info *vi;
486296f96fcSRusty Russell 
487296f96fcSRusty Russell 	/* Allocate ourselves a network device with room for our info */
488296f96fcSRusty Russell 	dev = alloc_etherdev(sizeof(struct virtnet_info));
489296f96fcSRusty Russell 	if (!dev)
490296f96fcSRusty Russell 		return -ENOMEM;
491296f96fcSRusty Russell 
492296f96fcSRusty Russell 	/* Set up network device as normal. */
493296f96fcSRusty Russell 	dev->open = virtnet_open;
494296f96fcSRusty Russell 	dev->stop = virtnet_close;
495296f96fcSRusty Russell 	dev->hard_start_xmit = start_xmit;
496296f96fcSRusty Russell 	dev->features = NETIF_F_HIGHDMA;
497da74e89dSAmit Shah #ifdef CONFIG_NET_POLL_CONTROLLER
498da74e89dSAmit Shah 	dev->poll_controller = virtnet_netpoll;
499da74e89dSAmit Shah #endif
500a9ea3fc6SHerbert Xu 	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
501296f96fcSRusty Russell 	SET_NETDEV_DEV(dev, &vdev->dev);
502296f96fcSRusty Russell 
503296f96fcSRusty Russell 	/* Do we support "hardware" checksums? */
504c45a6816SRusty Russell 	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
505296f96fcSRusty Russell 		/* This opens up the world of extra features. */
506296f96fcSRusty Russell 		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
507c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
50834a48579SRusty Russell 			dev->features |= NETIF_F_TSO | NETIF_F_UFO
50934a48579SRusty Russell 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
51034a48579SRusty Russell 		}
5115539ae96SRusty Russell 		/* Individual feature bits: what can host handle? */
512c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
5135539ae96SRusty Russell 			dev->features |= NETIF_F_TSO;
514c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
5155539ae96SRusty Russell 			dev->features |= NETIF_F_TSO6;
516c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
5175539ae96SRusty Russell 			dev->features |= NETIF_F_TSO_ECN;
518c45a6816SRusty Russell 		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
5195539ae96SRusty Russell 			dev->features |= NETIF_F_UFO;
520296f96fcSRusty Russell 	}
521296f96fcSRusty Russell 
522296f96fcSRusty Russell 	/* Configuration may specify what MAC to use.  Otherwise random. */
523c45a6816SRusty Russell 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
524a586d4f6SRusty Russell 		vdev->config->get(vdev,
525a586d4f6SRusty Russell 				  offsetof(struct virtio_net_config, mac),
526a586d4f6SRusty Russell 				  dev->dev_addr, dev->addr_len);
527296f96fcSRusty Russell 	} else
528296f96fcSRusty Russell 		random_ether_addr(dev->dev_addr);
529296f96fcSRusty Russell 
530296f96fcSRusty Russell 	/* Set up our device-specific information */
531296f96fcSRusty Russell 	vi = netdev_priv(dev);
5326c0cd7c0SDor Laor 	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
533296f96fcSRusty Russell 	vi->dev = dev;
534296f96fcSRusty Russell 	vi->vdev = vdev;
535d9d5dcc8SChristian Borntraeger 	vdev->priv = vi;
536fb6813f4SRusty Russell 	vi->pages = NULL;
537296f96fcSRusty Russell 
538363f1514SRusty Russell 	/* If they give us a callback when all buffers are done, we don't need
539363f1514SRusty Russell 	 * the timer. */
540363f1514SRusty Russell 	vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
541363f1514SRusty Russell 
54297402b96SHerbert Xu 	/* If we can receive ANY GSO packets, we must allocate large ones. */
54397402b96SHerbert Xu 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
54497402b96SHerbert Xu 	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
54597402b96SHerbert Xu 	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
54697402b96SHerbert Xu 		vi->big_packets = true;
54797402b96SHerbert Xu 
548296f96fcSRusty Russell 	/* We expect two virtqueues, receive then send. */
549a586d4f6SRusty Russell 	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
550296f96fcSRusty Russell 	if (IS_ERR(vi->rvq)) {
551296f96fcSRusty Russell 		err = PTR_ERR(vi->rvq);
552296f96fcSRusty Russell 		goto free;
553296f96fcSRusty Russell 	}
554296f96fcSRusty Russell 
555a586d4f6SRusty Russell 	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
556296f96fcSRusty Russell 	if (IS_ERR(vi->svq)) {
557296f96fcSRusty Russell 		err = PTR_ERR(vi->svq);
558296f96fcSRusty Russell 		goto free_recv;
559296f96fcSRusty Russell 	}
560296f96fcSRusty Russell 
561296f96fcSRusty Russell 	/* Initialize our empty receive and send queues. */
562296f96fcSRusty Russell 	skb_queue_head_init(&vi->recv);
563296f96fcSRusty Russell 	skb_queue_head_init(&vi->send);
564296f96fcSRusty Russell 
56511a3a154SRusty Russell 	tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);
56611a3a154SRusty Russell 
567363f1514SRusty Russell 	if (!vi->free_in_tasklet)
56814c998f0SMark McLoughlin 		setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);
56914c998f0SMark McLoughlin 
570296f96fcSRusty Russell 	err = register_netdev(dev);
571296f96fcSRusty Russell 	if (err) {
572296f96fcSRusty Russell 		pr_debug("virtio_net: registering device failed\n");
573296f96fcSRusty Russell 		goto free_send;
574296f96fcSRusty Russell 	}
575b3369c1fSRusty Russell 
576b3369c1fSRusty Russell 	/* Last of all, set up some receive buffers. */
577b3369c1fSRusty Russell 	try_fill_recv(vi);
578b3369c1fSRusty Russell 
579b3369c1fSRusty Russell 	/* If we didn't even get one input buffer, we're useless. */
580b3369c1fSRusty Russell 	if (vi->num == 0) {
581b3369c1fSRusty Russell 		err = -ENOMEM;
582b3369c1fSRusty Russell 		goto unregister;
583b3369c1fSRusty Russell 	}
584b3369c1fSRusty Russell 
585296f96fcSRusty Russell 	pr_debug("virtnet: registered device %s\n", dev->name);
586296f96fcSRusty Russell 	return 0;
587296f96fcSRusty Russell 
588b3369c1fSRusty Russell unregister:
589b3369c1fSRusty Russell 	unregister_netdev(dev);
590296f96fcSRusty Russell free_send:
591296f96fcSRusty Russell 	vdev->config->del_vq(vi->svq);
592296f96fcSRusty Russell free_recv:
593296f96fcSRusty Russell 	vdev->config->del_vq(vi->rvq);
594296f96fcSRusty Russell free:
595296f96fcSRusty Russell 	free_netdev(dev);
596296f96fcSRusty Russell 	return err;
597296f96fcSRusty Russell }
598296f96fcSRusty Russell 
599296f96fcSRusty Russell static void virtnet_remove(struct virtio_device *vdev)
600296f96fcSRusty Russell {
60174b2553fSRusty Russell 	struct virtnet_info *vi = vdev->priv;
602b3369c1fSRusty Russell 	struct sk_buff *skb;
603b3369c1fSRusty Russell 
6046e5aa7efSRusty Russell 	/* Stop all the virtqueues. */
6056e5aa7efSRusty Russell 	vdev->config->reset(vdev);
6066e5aa7efSRusty Russell 
607363f1514SRusty Russell 	if (!vi->free_in_tasklet)
60814c998f0SMark McLoughlin 		del_timer_sync(&vi->xmit_free_timer);
60914c998f0SMark McLoughlin 
610b3369c1fSRusty Russell 	/* Free our skbs in send and recv queues, if any. */
611b3369c1fSRusty Russell 	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
612b3369c1fSRusty Russell 		kfree_skb(skb);
613b3369c1fSRusty Russell 		vi->num--;
614b3369c1fSRusty Russell 	}
615288369ccSWang Chen 	__skb_queue_purge(&vi->send);
616b3369c1fSRusty Russell 
617b3369c1fSRusty Russell 	BUG_ON(vi->num != 0);
61874b2553fSRusty Russell 
61974b2553fSRusty Russell 	vdev->config->del_vq(vi->svq);
62074b2553fSRusty Russell 	vdev->config->del_vq(vi->rvq);
62174b2553fSRusty Russell 	unregister_netdev(vi->dev);
622fb6813f4SRusty Russell 
623fb6813f4SRusty Russell 	while (vi->pages)
624fb6813f4SRusty Russell 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
625fb6813f4SRusty Russell 
62674b2553fSRusty Russell 	free_netdev(vi->dev);
627296f96fcSRusty Russell }
628296f96fcSRusty Russell 
629296f96fcSRusty Russell static struct virtio_device_id id_table[] = {
630296f96fcSRusty Russell 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
631296f96fcSRusty Russell 	{ 0 },
632296f96fcSRusty Russell };
633296f96fcSRusty Russell 
634c45a6816SRusty Russell static unsigned int features[] = {
6355e4fe5c4SMark McLoughlin 	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
6365e4fe5c4SMark McLoughlin 	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
637c45a6816SRusty Russell 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
63897402b96SHerbert Xu 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
63997402b96SHerbert Xu 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
64097402b96SHerbert Xu 	VIRTIO_F_NOTIFY_ON_EMPTY,
641c45a6816SRusty Russell };
642c45a6816SRusty Russell 
643296f96fcSRusty Russell static struct virtio_driver virtio_net = {
644c45a6816SRusty Russell 	.feature_table = features,
645c45a6816SRusty Russell 	.feature_table_size = ARRAY_SIZE(features),
646296f96fcSRusty Russell 	.driver.name =	KBUILD_MODNAME,
647296f96fcSRusty Russell 	.driver.owner =	THIS_MODULE,
648296f96fcSRusty Russell 	.id_table =	id_table,
649296f96fcSRusty Russell 	.probe =	virtnet_probe,
650296f96fcSRusty Russell 	.remove =	__devexit_p(virtnet_remove),
651296f96fcSRusty Russell };
652296f96fcSRusty Russell 
653296f96fcSRusty Russell static int __init init(void)
654296f96fcSRusty Russell {
655296f96fcSRusty Russell 	return register_virtio_driver(&virtio_net);
656296f96fcSRusty Russell }
657296f96fcSRusty Russell 
658296f96fcSRusty Russell static void __exit fini(void)
659296f96fcSRusty Russell {
660296f96fcSRusty Russell 	unregister_virtio_driver(&virtio_net);
661296f96fcSRusty Russell }
662296f96fcSRusty Russell module_init(init);
663296f96fcSRusty Russell module_exit(fini);
664296f96fcSRusty Russell 
665296f96fcSRusty Russell MODULE_DEVICE_TABLE(virtio, id_table);
666296f96fcSRusty Russell MODULE_DESCRIPTION("Virtio network driver");
667296f96fcSRusty Russell MODULE_LICENSE("GPL");
668