1 /*
2  * Back-end of the driver for virtual network devices. This portion of the
3  * driver exports a 'unified' network-device interface that can be accessed
4  * by any operating system that implements a compatible front end. A
5  * reference front-end implementation can be found in:
6  *  drivers/net/xen-netfront.c
7  *
8  * Copyright (c) 2002-2005, K A Fraser
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License version 2
12  * as published by the Free Software Foundation; or, when distributed
13  * separately from the Linux kernel or incorporated into other
14  * software packages, subject to the following license:
15  *
16  * Permission is hereby granted, free of charge, to any person obtaining a copy
17  * of this source file (the "Software"), to deal in the Software without
18  * restriction, including without limitation the rights to use, copy, modify,
19  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20  * and to permit persons to whom the Software is furnished to do so, subject to
21  * the following conditions:
22  *
23  * The above copyright notice and this permission notice shall be included in
24  * all copies or substantial portions of the Software.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32  * IN THE SOFTWARE.
33  */
34 
35 #include "common.h"
36 
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
41 
42 #include <net/tcp.h>
43 
44 #include <xen/xen.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
47 
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
50 
51 /* Provide an option to disable split event channels at load time as
52  * event channels are limited resource. Split event channels are
53  * enabled by default.
54  */
55 bool separate_tx_rx_irq = 1;
56 module_param(separate_tx_rx_irq, bool, 0644);
57 
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59  * to timeout them, otherwise other guests' packets can get stuck there
60  */
61 unsigned int rx_drain_timeout_msecs = 10000;
62 module_param(rx_drain_timeout_msecs, uint, 0444);
63 unsigned int rx_drain_timeout_jiffies;
64 
65 /*
66  * This is the maximum slots a skb can have. If a guest sends a skb
67  * which exceeds this limit it is considered malicious.
68  */
69 #define FATAL_SKB_SLOTS_DEFAULT 20
70 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
71 module_param(fatal_skb_slots, uint, 0444);
72 
73 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
74 			       u8 status);
75 
76 static void make_tx_response(struct xenvif *vif,
77 			     struct xen_netif_tx_request *txp,
78 			     s8       st);
79 
80 static inline int tx_work_todo(struct xenvif *vif);
81 static inline int rx_work_todo(struct xenvif *vif);
82 
83 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
84 					     u16      id,
85 					     s8       st,
86 					     u16      offset,
87 					     u16      size,
88 					     u16      flags);
89 
90 static inline unsigned long idx_to_pfn(struct xenvif *vif,
91 				       u16 idx)
92 {
93 	return page_to_pfn(vif->mmap_pages[idx]);
94 }
95 
96 static inline unsigned long idx_to_kaddr(struct xenvif *vif,
97 					 u16 idx)
98 {
99 	return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
100 }
101 
102 #define callback_param(vif, pending_idx) \
103 	(vif->pending_tx_info[pending_idx].callback_struct)
104 
105 /* Find the containing VIF's structure from a pointer in pending_tx_info array
106  */
107 static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf)
108 {
109 	u16 pending_idx = ubuf->desc;
110 	struct pending_tx_info *temp =
111 		container_of(ubuf, struct pending_tx_info, callback_struct);
112 	return container_of(temp - pending_idx,
113 			    struct xenvif,
114 			    pending_tx_info[0]);
115 }
116 
117 /* This is a miniumum size for the linear area to avoid lots of
118  * calls to __pskb_pull_tail() as we set up checksum offsets. The
119  * value 128 was chosen as it covers all IPv4 and most likely
120  * IPv6 headers.
121  */
122 #define PKT_PROT_LEN 128
123 
124 static u16 frag_get_pending_idx(skb_frag_t *frag)
125 {
126 	return (u16)frag->page_offset;
127 }
128 
129 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
130 {
131 	frag->page_offset = pending_idx;
132 }
133 
134 static inline pending_ring_idx_t pending_index(unsigned i)
135 {
136 	return i & (MAX_PENDING_REQS-1);
137 }
138 
139 bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
140 {
141 	RING_IDX prod, cons;
142 
143 	do {
144 		prod = vif->rx.sring->req_prod;
145 		cons = vif->rx.req_cons;
146 
147 		if (prod - cons >= needed)
148 			return true;
149 
150 		vif->rx.sring->req_event = prod + 1;
151 
152 		/* Make sure event is visible before we check prod
153 		 * again.
154 		 */
155 		mb();
156 	} while (vif->rx.sring->req_prod != prod);
157 
158 	return false;
159 }
160 
161 /*
162  * Returns true if we should start a new receive buffer instead of
163  * adding 'size' bytes to a buffer which currently contains 'offset'
164  * bytes.
165  */
166 static bool start_new_rx_buffer(int offset, unsigned long size, int head)
167 {
168 	/* simple case: we have completely filled the current buffer. */
169 	if (offset == MAX_BUFFER_OFFSET)
170 		return true;
171 
172 	/*
173 	 * complex case: start a fresh buffer if the current frag
174 	 * would overflow the current buffer but only if:
175 	 *     (i)   this frag would fit completely in the next buffer
176 	 * and (ii)  there is already some data in the current buffer
177 	 * and (iii) this is not the head buffer.
178 	 *
179 	 * Where:
180 	 * - (i) stops us splitting a frag into two copies
181 	 *   unless the frag is too large for a single buffer.
182 	 * - (ii) stops us from leaving a buffer pointlessly empty.
183 	 * - (iii) stops us leaving the first buffer
184 	 *   empty. Strictly speaking this is already covered
185 	 *   by (ii) but is explicitly checked because
186 	 *   netfront relies on the first buffer being
187 	 *   non-empty and can crash otherwise.
188 	 *
189 	 * This means we will effectively linearise small
190 	 * frags but do not needlessly split large buffers
191 	 * into multiple copies tend to give large frags their
192 	 * own buffers as before.
193 	 */
194 	BUG_ON(size > MAX_BUFFER_OFFSET);
195 	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)
196 		return true;
197 
198 	return false;
199 }
200 
201 struct netrx_pending_operations {
202 	unsigned copy_prod, copy_cons;
203 	unsigned meta_prod, meta_cons;
204 	struct gnttab_copy *copy;
205 	struct xenvif_rx_meta *meta;
206 	int copy_off;
207 	grant_ref_t copy_gref;
208 };
209 
210 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
211 						 struct netrx_pending_operations *npo)
212 {
213 	struct xenvif_rx_meta *meta;
214 	struct xen_netif_rx_request *req;
215 
216 	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
217 
218 	meta = npo->meta + npo->meta_prod++;
219 	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
220 	meta->gso_size = 0;
221 	meta->size = 0;
222 	meta->id = req->id;
223 
224 	npo->copy_off = 0;
225 	npo->copy_gref = req->gref;
226 
227 	return meta;
228 }
229 
230 /*
231  * Set up the grant operations for this fragment. If it's a flipping
232  * interface, we also set up the unmap request from here.
233  */
234 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
235 				 struct netrx_pending_operations *npo,
236 				 struct page *page, unsigned long size,
237 				 unsigned long offset, int *head,
238 				 struct xenvif *foreign_vif,
239 				 grant_ref_t foreign_gref)
240 {
241 	struct gnttab_copy *copy_gop;
242 	struct xenvif_rx_meta *meta;
243 	unsigned long bytes;
244 	int gso_type = XEN_NETIF_GSO_TYPE_NONE;
245 
246 	/* Data must not cross a page boundary. */
247 	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
248 
249 	meta = npo->meta + npo->meta_prod - 1;
250 
251 	/* Skip unused frames from start of page */
252 	page += offset >> PAGE_SHIFT;
253 	offset &= ~PAGE_MASK;
254 
255 	while (size > 0) {
256 		BUG_ON(offset >= PAGE_SIZE);
257 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
258 
259 		bytes = PAGE_SIZE - offset;
260 
261 		if (bytes > size)
262 			bytes = size;
263 
264 		if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
265 			/*
266 			 * Netfront requires there to be some data in the head
267 			 * buffer.
268 			 */
269 			BUG_ON(*head);
270 
271 			meta = get_next_rx_buffer(vif, npo);
272 		}
273 
274 		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
275 			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
276 
277 		copy_gop = npo->copy + npo->copy_prod++;
278 		copy_gop->flags = GNTCOPY_dest_gref;
279 		copy_gop->len = bytes;
280 
281 		if (foreign_vif) {
282 			copy_gop->source.domid = foreign_vif->domid;
283 			copy_gop->source.u.ref = foreign_gref;
284 			copy_gop->flags |= GNTCOPY_source_gref;
285 		} else {
286 			copy_gop->source.domid = DOMID_SELF;
287 			copy_gop->source.u.gmfn =
288 				virt_to_mfn(page_address(page));
289 		}
290 		copy_gop->source.offset = offset;
291 
292 		copy_gop->dest.domid = vif->domid;
293 		copy_gop->dest.offset = npo->copy_off;
294 		copy_gop->dest.u.ref = npo->copy_gref;
295 
296 		npo->copy_off += bytes;
297 		meta->size += bytes;
298 
299 		offset += bytes;
300 		size -= bytes;
301 
302 		/* Next frame */
303 		if (offset == PAGE_SIZE && size) {
304 			BUG_ON(!PageCompound(page));
305 			page++;
306 			offset = 0;
307 		}
308 
309 		/* Leave a gap for the GSO descriptor. */
310 		if (skb_is_gso(skb)) {
311 			if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
312 				gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
313 			else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
314 				gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
315 		}
316 
317 		if (*head && ((1 << gso_type) & vif->gso_mask))
318 			vif->rx.req_cons++;
319 
320 		*head = 0; /* There must be something in this buffer now. */
321 
322 	}
323 }
324 
325 /*
326  * Find the grant ref for a given frag in a chain of struct ubuf_info's
327  * skb: the skb itself
328  * i: the frag's number
329  * ubuf: a pointer to an element in the chain. It should not be NULL
330  *
331  * Returns a pointer to the element in the chain where the page were found. If
332  * not found, returns NULL.
333  * See the definition of callback_struct in common.h for more details about
334  * the chain.
335  */
336 static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb,
337 						const int i,
338 						const struct ubuf_info *ubuf)
339 {
340 	struct xenvif *foreign_vif = ubuf_to_vif(ubuf);
341 
342 	do {
343 		u16 pending_idx = ubuf->desc;
344 
345 		if (skb_shinfo(skb)->frags[i].page.p ==
346 		    foreign_vif->mmap_pages[pending_idx])
347 			break;
348 		ubuf = (struct ubuf_info *) ubuf->ctx;
349 	} while (ubuf);
350 
351 	return ubuf;
352 }
353 
354 /*
355  * Prepare an SKB to be transmitted to the frontend.
356  *
357  * This function is responsible for allocating grant operations, meta
358  * structures, etc.
359  *
360  * It returns the number of meta structures consumed. The number of
361  * ring slots used is always equal to the number of meta slots used
362  * plus the number of GSO descriptors used. Currently, we use either
363  * zero GSO descriptors (for non-GSO packets) or one descriptor (for
364  * frontend-side LRO).
365  */
366 static int xenvif_gop_skb(struct sk_buff *skb,
367 			  struct netrx_pending_operations *npo)
368 {
369 	struct xenvif *vif = netdev_priv(skb->dev);
370 	int nr_frags = skb_shinfo(skb)->nr_frags;
371 	int i;
372 	struct xen_netif_rx_request *req;
373 	struct xenvif_rx_meta *meta;
374 	unsigned char *data;
375 	int head = 1;
376 	int old_meta_prod;
377 	int gso_type;
378 	const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
379 	const struct ubuf_info *const head_ubuf = ubuf;
380 
381 	old_meta_prod = npo->meta_prod;
382 
383 	gso_type = XEN_NETIF_GSO_TYPE_NONE;
384 	if (skb_is_gso(skb)) {
385 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
386 			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
387 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
388 			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
389 	}
390 
391 	/* Set up a GSO prefix descriptor, if necessary */
392 	if ((1 << gso_type) & vif->gso_prefix_mask) {
393 		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
394 		meta = npo->meta + npo->meta_prod++;
395 		meta->gso_type = gso_type;
396 		meta->gso_size = skb_shinfo(skb)->gso_size;
397 		meta->size = 0;
398 		meta->id = req->id;
399 	}
400 
401 	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
402 	meta = npo->meta + npo->meta_prod++;
403 
404 	if ((1 << gso_type) & vif->gso_mask) {
405 		meta->gso_type = gso_type;
406 		meta->gso_size = skb_shinfo(skb)->gso_size;
407 	} else {
408 		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
409 		meta->gso_size = 0;
410 	}
411 
412 	meta->size = 0;
413 	meta->id = req->id;
414 	npo->copy_off = 0;
415 	npo->copy_gref = req->gref;
416 
417 	data = skb->data;
418 	while (data < skb_tail_pointer(skb)) {
419 		unsigned int offset = offset_in_page(data);
420 		unsigned int len = PAGE_SIZE - offset;
421 
422 		if (data + len > skb_tail_pointer(skb))
423 			len = skb_tail_pointer(skb) - data;
424 
425 		xenvif_gop_frag_copy(vif, skb, npo,
426 				     virt_to_page(data), len, offset, &head,
427 				     NULL,
428 				     0);
429 		data += len;
430 	}
431 
432 	for (i = 0; i < nr_frags; i++) {
433 		/* This variable also signals whether foreign_gref has a real
434 		 * value or not.
435 		 */
436 		struct xenvif *foreign_vif = NULL;
437 		grant_ref_t foreign_gref;
438 
439 		if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
440 			(ubuf->callback == &xenvif_zerocopy_callback)) {
441 			const struct ubuf_info *const startpoint = ubuf;
442 
443 			/* Ideally ubuf points to the chain element which
444 			 * belongs to this frag. Or if frags were removed from
445 			 * the beginning, then shortly before it.
446 			 */
447 			ubuf = xenvif_find_gref(skb, i, ubuf);
448 
449 			/* Try again from the beginning of the list, if we
450 			 * haven't tried from there. This only makes sense in
451 			 * the unlikely event of reordering the original frags.
452 			 * For injected local pages it's an unnecessary second
453 			 * run.
454 			 */
455 			if (unlikely(!ubuf) && startpoint != head_ubuf)
456 				ubuf = xenvif_find_gref(skb, i, head_ubuf);
457 
458 			if (likely(ubuf)) {
459 				u16 pending_idx = ubuf->desc;
460 
461 				foreign_vif = ubuf_to_vif(ubuf);
462 				foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref;
463 				/* Just a safety measure. If this was the last
464 				 * element on the list, the for loop will
465 				 * iterate again if a local page were added to
466 				 * the end. Using head_ubuf here prevents the
467 				 * second search on the chain. Or the original
468 				 * frags changed order, but that's less likely.
469 				 * In any way, ubuf shouldn't be NULL.
470 				 */
471 				ubuf = ubuf->ctx ?
472 					(struct ubuf_info *) ubuf->ctx :
473 					head_ubuf;
474 			} else
475 				/* This frag was a local page, added to the
476 				 * array after the skb left netback.
477 				 */
478 				ubuf = head_ubuf;
479 		}
480 		xenvif_gop_frag_copy(vif, skb, npo,
481 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
482 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
483 				     skb_shinfo(skb)->frags[i].page_offset,
484 				     &head,
485 				     foreign_vif,
486 				     foreign_vif ? foreign_gref : UINT_MAX);
487 	}
488 
489 	return npo->meta_prod - old_meta_prod;
490 }
491 
492 /*
493  * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
494  * used to set up the operations on the top of
495  * netrx_pending_operations, which have since been done.  Check that
496  * they didn't give any errors and advance over them.
497  */
498 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
499 			    struct netrx_pending_operations *npo)
500 {
501 	struct gnttab_copy     *copy_op;
502 	int status = XEN_NETIF_RSP_OKAY;
503 	int i;
504 
505 	for (i = 0; i < nr_meta_slots; i++) {
506 		copy_op = npo->copy + npo->copy_cons++;
507 		if (copy_op->status != GNTST_okay) {
508 			netdev_dbg(vif->dev,
509 				   "Bad status %d from copy to DOM%d.\n",
510 				   copy_op->status, vif->domid);
511 			status = XEN_NETIF_RSP_ERROR;
512 		}
513 	}
514 
515 	return status;
516 }
517 
518 static void xenvif_add_frag_responses(struct xenvif *vif, int status,
519 				      struct xenvif_rx_meta *meta,
520 				      int nr_meta_slots)
521 {
522 	int i;
523 	unsigned long offset;
524 
525 	/* No fragments used */
526 	if (nr_meta_slots <= 1)
527 		return;
528 
529 	nr_meta_slots--;
530 
531 	for (i = 0; i < nr_meta_slots; i++) {
532 		int flags;
533 		if (i == nr_meta_slots - 1)
534 			flags = 0;
535 		else
536 			flags = XEN_NETRXF_more_data;
537 
538 		offset = 0;
539 		make_rx_response(vif, meta[i].id, status, offset,
540 				 meta[i].size, flags);
541 	}
542 }
543 
544 struct xenvif_rx_cb {
545 	int meta_slots_used;
546 };
547 
548 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
549 
550 void xenvif_kick_thread(struct xenvif *vif)
551 {
552 	wake_up(&vif->wq);
553 }
554 
555 static void xenvif_rx_action(struct xenvif *vif)
556 {
557 	s8 status;
558 	u16 flags;
559 	struct xen_netif_rx_response *resp;
560 	struct sk_buff_head rxq;
561 	struct sk_buff *skb;
562 	LIST_HEAD(notify);
563 	int ret;
564 	unsigned long offset;
565 	bool need_to_notify = false;
566 
567 	struct netrx_pending_operations npo = {
568 		.copy  = vif->grant_copy_op,
569 		.meta  = vif->meta,
570 	};
571 
572 	skb_queue_head_init(&rxq);
573 
574 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
575 		RING_IDX max_slots_needed;
576 		RING_IDX old_req_cons;
577 		RING_IDX ring_slots_used;
578 		int i;
579 
580 		/* We need a cheap worse case estimate for the number of
581 		 * slots we'll use.
582 		 */
583 
584 		max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
585 						skb_headlen(skb),
586 						PAGE_SIZE);
587 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
588 			unsigned int size;
589 			unsigned int offset;
590 
591 			size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
592 			offset = skb_shinfo(skb)->frags[i].page_offset;
593 
594 			/* For a worse-case estimate we need to factor in
595 			 * the fragment page offset as this will affect the
596 			 * number of times xenvif_gop_frag_copy() will
597 			 * call start_new_rx_buffer().
598 			 */
599 			max_slots_needed += DIV_ROUND_UP(offset + size,
600 							 PAGE_SIZE);
601 		}
602 
603 		/* To avoid the estimate becoming too pessimal for some
604 		 * frontends that limit posted rx requests, cap the estimate
605 		 * at MAX_SKB_FRAGS.
606 		 */
607 		if (max_slots_needed > MAX_SKB_FRAGS)
608 			max_slots_needed = MAX_SKB_FRAGS;
609 
610 		/* We may need one more slot for GSO metadata */
611 		if (skb_is_gso(skb) &&
612 		   (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
613 		    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
614 			max_slots_needed++;
615 
616 		/* If the skb may not fit then bail out now */
617 		if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
618 			skb_queue_head(&vif->rx_queue, skb);
619 			need_to_notify = true;
620 			vif->rx_last_skb_slots = max_slots_needed;
621 			break;
622 		} else
623 			vif->rx_last_skb_slots = 0;
624 
625 		old_req_cons = vif->rx.req_cons;
626 		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
627 		ring_slots_used = vif->rx.req_cons - old_req_cons;
628 
629 		BUG_ON(ring_slots_used > max_slots_needed);
630 
631 		__skb_queue_tail(&rxq, skb);
632 	}
633 
634 	BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
635 
636 	if (!npo.copy_prod)
637 		goto done;
638 
639 	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
640 	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
641 
642 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
643 
644 		if ((1 << vif->meta[npo.meta_cons].gso_type) &
645 		    vif->gso_prefix_mask) {
646 			resp = RING_GET_RESPONSE(&vif->rx,
647 						 vif->rx.rsp_prod_pvt++);
648 
649 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
650 
651 			resp->offset = vif->meta[npo.meta_cons].gso_size;
652 			resp->id = vif->meta[npo.meta_cons].id;
653 			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
654 
655 			npo.meta_cons++;
656 			XENVIF_RX_CB(skb)->meta_slots_used--;
657 		}
658 
659 
660 		vif->dev->stats.tx_bytes += skb->len;
661 		vif->dev->stats.tx_packets++;
662 
663 		status = xenvif_check_gop(vif,
664 					  XENVIF_RX_CB(skb)->meta_slots_used,
665 					  &npo);
666 
667 		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
668 			flags = 0;
669 		else
670 			flags = XEN_NETRXF_more_data;
671 
672 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
673 			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
674 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
675 			/* remote but checksummed. */
676 			flags |= XEN_NETRXF_data_validated;
677 
678 		offset = 0;
679 		resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
680 					status, offset,
681 					vif->meta[npo.meta_cons].size,
682 					flags);
683 
684 		if ((1 << vif->meta[npo.meta_cons].gso_type) &
685 		    vif->gso_mask) {
686 			struct xen_netif_extra_info *gso =
687 				(struct xen_netif_extra_info *)
688 				RING_GET_RESPONSE(&vif->rx,
689 						  vif->rx.rsp_prod_pvt++);
690 
691 			resp->flags |= XEN_NETRXF_extra_info;
692 
693 			gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
694 			gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
695 			gso->u.gso.pad = 0;
696 			gso->u.gso.features = 0;
697 
698 			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
699 			gso->flags = 0;
700 		}
701 
702 		xenvif_add_frag_responses(vif, status,
703 					  vif->meta + npo.meta_cons + 1,
704 					  XENVIF_RX_CB(skb)->meta_slots_used);
705 
706 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
707 
708 		need_to_notify |= !!ret;
709 
710 		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
711 		dev_kfree_skb(skb);
712 	}
713 
714 done:
715 	if (need_to_notify)
716 		notify_remote_via_irq(vif->rx_irq);
717 }
718 
719 void xenvif_napi_schedule_or_enable_events(struct xenvif *vif)
720 {
721 	int more_to_do;
722 
723 	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
724 
725 	if (more_to_do)
726 		napi_schedule(&vif->napi);
727 }
728 
729 static void tx_add_credit(struct xenvif *vif)
730 {
731 	unsigned long max_burst, max_credit;
732 
733 	/*
734 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
735 	 * Otherwise the interface can seize up due to insufficient credit.
736 	 */
737 	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
738 	max_burst = min(max_burst, 131072UL);
739 	max_burst = max(max_burst, vif->credit_bytes);
740 
741 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
742 	max_credit = vif->remaining_credit + vif->credit_bytes;
743 	if (max_credit < vif->remaining_credit)
744 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
745 
746 	vif->remaining_credit = min(max_credit, max_burst);
747 }
748 
749 static void tx_credit_callback(unsigned long data)
750 {
751 	struct xenvif *vif = (struct xenvif *)data;
752 	tx_add_credit(vif);
753 	xenvif_napi_schedule_or_enable_events(vif);
754 }
755 
756 static void xenvif_tx_err(struct xenvif *vif,
757 			  struct xen_netif_tx_request *txp, RING_IDX end)
758 {
759 	RING_IDX cons = vif->tx.req_cons;
760 	unsigned long flags;
761 
762 	do {
763 		spin_lock_irqsave(&vif->response_lock, flags);
764 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
765 		spin_unlock_irqrestore(&vif->response_lock, flags);
766 		if (cons == end)
767 			break;
768 		txp = RING_GET_REQUEST(&vif->tx, cons++);
769 	} while (1);
770 	vif->tx.req_cons = cons;
771 }
772 
773 static void xenvif_fatal_tx_err(struct xenvif *vif)
774 {
775 	netdev_err(vif->dev, "fatal error; disabling device\n");
776 	vif->disabled = true;
777 	xenvif_kick_thread(vif);
778 }
779 
780 static int xenvif_count_requests(struct xenvif *vif,
781 				 struct xen_netif_tx_request *first,
782 				 struct xen_netif_tx_request *txp,
783 				 int work_to_do)
784 {
785 	RING_IDX cons = vif->tx.req_cons;
786 	int slots = 0;
787 	int drop_err = 0;
788 	int more_data;
789 
790 	if (!(first->flags & XEN_NETTXF_more_data))
791 		return 0;
792 
793 	do {
794 		struct xen_netif_tx_request dropped_tx = { 0 };
795 
796 		if (slots >= work_to_do) {
797 			netdev_err(vif->dev,
798 				   "Asked for %d slots but exceeds this limit\n",
799 				   work_to_do);
800 			xenvif_fatal_tx_err(vif);
801 			return -ENODATA;
802 		}
803 
804 		/* This guest is really using too many slots and
805 		 * considered malicious.
806 		 */
807 		if (unlikely(slots >= fatal_skb_slots)) {
808 			netdev_err(vif->dev,
809 				   "Malicious frontend using %d slots, threshold %u\n",
810 				   slots, fatal_skb_slots);
811 			xenvif_fatal_tx_err(vif);
812 			return -E2BIG;
813 		}
814 
815 		/* Xen network protocol had implicit dependency on
816 		 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
817 		 * the historical MAX_SKB_FRAGS value 18 to honor the
818 		 * same behavior as before. Any packet using more than
819 		 * 18 slots but less than fatal_skb_slots slots is
820 		 * dropped
821 		 */
822 		if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
823 			if (net_ratelimit())
824 				netdev_dbg(vif->dev,
825 					   "Too many slots (%d) exceeding limit (%d), dropping packet\n",
826 					   slots, XEN_NETBK_LEGACY_SLOTS_MAX);
827 			drop_err = -E2BIG;
828 		}
829 
830 		if (drop_err)
831 			txp = &dropped_tx;
832 
833 		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
834 		       sizeof(*txp));
835 
836 		/* If the guest submitted a frame >= 64 KiB then
837 		 * first->size overflowed and following slots will
838 		 * appear to be larger than the frame.
839 		 *
840 		 * This cannot be fatal error as there are buggy
841 		 * frontends that do this.
842 		 *
843 		 * Consume all slots and drop the packet.
844 		 */
845 		if (!drop_err && txp->size > first->size) {
846 			if (net_ratelimit())
847 				netdev_dbg(vif->dev,
848 					   "Invalid tx request, slot size %u > remaining size %u\n",
849 					   txp->size, first->size);
850 			drop_err = -EIO;
851 		}
852 
853 		first->size -= txp->size;
854 		slots++;
855 
856 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
857 			netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
858 				 txp->offset, txp->size);
859 			xenvif_fatal_tx_err(vif);
860 			return -EINVAL;
861 		}
862 
863 		more_data = txp->flags & XEN_NETTXF_more_data;
864 
865 		if (!drop_err)
866 			txp++;
867 
868 	} while (more_data);
869 
870 	if (drop_err) {
871 		xenvif_tx_err(vif, first, cons + slots);
872 		return drop_err;
873 	}
874 
875 	return slots;
876 }
877 
878 
879 struct xenvif_tx_cb {
880 	u16 pending_idx;
881 };
882 
883 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
884 
885 static inline void xenvif_tx_create_map_op(struct xenvif *vif,
886 					  u16 pending_idx,
887 					  struct xen_netif_tx_request *txp,
888 					  struct gnttab_map_grant_ref *mop)
889 {
890 	vif->pages_to_map[mop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
891 	gnttab_set_map_op(mop, idx_to_kaddr(vif, pending_idx),
892 			  GNTMAP_host_map | GNTMAP_readonly,
893 			  txp->gref, vif->domid);
894 
895 	memcpy(&vif->pending_tx_info[pending_idx].req, txp,
896 	       sizeof(*txp));
897 }
898 
899 static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
900 {
901 	struct sk_buff *skb =
902 		alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
903 			  GFP_ATOMIC | __GFP_NOWARN);
904 	if (unlikely(skb == NULL))
905 		return NULL;
906 
907 	/* Packets passed to netif_rx() must have some headroom. */
908 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
909 
910 	/* Initialize it here to avoid later surprises */
911 	skb_shinfo(skb)->destructor_arg = NULL;
912 
913 	return skb;
914 }
915 
916 static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
917 							struct sk_buff *skb,
918 							struct xen_netif_tx_request *txp,
919 							struct gnttab_map_grant_ref *gop)
920 {
921 	struct skb_shared_info *shinfo = skb_shinfo(skb);
922 	skb_frag_t *frags = shinfo->frags;
923 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
924 	int start;
925 	pending_ring_idx_t index;
926 	unsigned int nr_slots, frag_overflow = 0;
927 
928 	/* At this point shinfo->nr_frags is in fact the number of
929 	 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
930 	 */
931 	if (shinfo->nr_frags > MAX_SKB_FRAGS) {
932 		frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS;
933 		BUG_ON(frag_overflow > MAX_SKB_FRAGS);
934 		shinfo->nr_frags = MAX_SKB_FRAGS;
935 	}
936 	nr_slots = shinfo->nr_frags;
937 
938 	/* Skip first skb fragment if it is on same page as header fragment. */
939 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
940 
941 	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
942 	     shinfo->nr_frags++, txp++, gop++) {
943 		index = pending_index(vif->pending_cons++);
944 		pending_idx = vif->pending_ring[index];
945 		xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
946 		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
947 	}
948 
949 	if (frag_overflow) {
950 		struct sk_buff *nskb = xenvif_alloc_skb(0);
951 		if (unlikely(nskb == NULL)) {
952 			if (net_ratelimit())
953 				netdev_err(vif->dev,
954 					   "Can't allocate the frag_list skb.\n");
955 			return NULL;
956 		}
957 
958 		shinfo = skb_shinfo(nskb);
959 		frags = shinfo->frags;
960 
961 		for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
962 		     shinfo->nr_frags++, txp++, gop++) {
963 			index = pending_index(vif->pending_cons++);
964 			pending_idx = vif->pending_ring[index];
965 			xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
966 			frag_set_pending_idx(&frags[shinfo->nr_frags],
967 					     pending_idx);
968 		}
969 
970 		skb_shinfo(skb)->frag_list = nskb;
971 	}
972 
973 	return gop;
974 }
975 
976 static inline void xenvif_grant_handle_set(struct xenvif *vif,
977 					   u16 pending_idx,
978 					   grant_handle_t handle)
979 {
980 	if (unlikely(vif->grant_tx_handle[pending_idx] !=
981 		     NETBACK_INVALID_HANDLE)) {
982 		netdev_err(vif->dev,
983 			   "Trying to overwrite active handle! pending_idx: %x\n",
984 			   pending_idx);
985 		BUG();
986 	}
987 	vif->grant_tx_handle[pending_idx] = handle;
988 }
989 
990 static inline void xenvif_grant_handle_reset(struct xenvif *vif,
991 					     u16 pending_idx)
992 {
993 	if (unlikely(vif->grant_tx_handle[pending_idx] ==
994 		     NETBACK_INVALID_HANDLE)) {
995 		netdev_err(vif->dev,
996 			   "Trying to unmap invalid handle! pending_idx: %x\n",
997 			   pending_idx);
998 		BUG();
999 	}
1000 	vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
1001 }
1002 
1003 static int xenvif_tx_check_gop(struct xenvif *vif,
1004 			       struct sk_buff *skb,
1005 			       struct gnttab_map_grant_ref **gopp_map,
1006 			       struct gnttab_copy **gopp_copy)
1007 {
1008 	struct gnttab_map_grant_ref *gop_map = *gopp_map;
1009 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1010 	struct skb_shared_info *shinfo = skb_shinfo(skb);
1011 	int nr_frags = shinfo->nr_frags;
1012 	int i, err;
1013 	struct sk_buff *first_skb = NULL;
1014 
1015 	/* Check status of header. */
1016 	err = (*gopp_copy)->status;
1017 	(*gopp_copy)++;
1018 	if (unlikely(err)) {
1019 		if (net_ratelimit())
1020 			netdev_dbg(vif->dev,
1021 				   "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1022 				   (*gopp_copy)->status,
1023 				   pending_idx,
1024 				   (*gopp_copy)->source.u.ref);
1025 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
1026 	}
1027 
1028 check_frags:
1029 	for (i = 0; i < nr_frags; i++, gop_map++) {
1030 		int j, newerr;
1031 
1032 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1033 
1034 		/* Check error status: if okay then remember grant handle. */
1035 		newerr = gop_map->status;
1036 
1037 		if (likely(!newerr)) {
1038 			xenvif_grant_handle_set(vif,
1039 						pending_idx,
1040 						gop_map->handle);
1041 			/* Had a previous error? Invalidate this fragment. */
1042 			if (unlikely(err))
1043 				xenvif_idx_unmap(vif, pending_idx);
1044 			continue;
1045 		}
1046 
1047 		/* Error on this fragment: respond to client with an error. */
1048 		if (net_ratelimit())
1049 			netdev_dbg(vif->dev,
1050 				   "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1051 				   i,
1052 				   gop_map->status,
1053 				   pending_idx,
1054 				   gop_map->ref);
1055 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
1056 
1057 		/* Not the first error? Preceding frags already invalidated. */
1058 		if (err)
1059 			continue;
1060 		/* First error: invalidate preceding fragments. */
1061 		for (j = 0; j < i; j++) {
1062 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1063 			xenvif_idx_unmap(vif, pending_idx);
1064 		}
1065 
1066 		/* Remember the error: invalidate all subsequent fragments. */
1067 		err = newerr;
1068 	}
1069 
1070 	if (skb_has_frag_list(skb)) {
1071 		first_skb = skb;
1072 		skb = shinfo->frag_list;
1073 		shinfo = skb_shinfo(skb);
1074 		nr_frags = shinfo->nr_frags;
1075 
1076 		goto check_frags;
1077 	}
1078 
1079 	/* There was a mapping error in the frag_list skb. We have to unmap
1080 	 * the first skb's frags
1081 	 */
1082 	if (first_skb && err) {
1083 		int j;
1084 		shinfo = skb_shinfo(first_skb);
1085 		for (j = 0; j < shinfo->nr_frags; j++) {
1086 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1087 			xenvif_idx_unmap(vif, pending_idx);
1088 		}
1089 	}
1090 
1091 	*gopp_map = gop_map;
1092 	return err;
1093 }
1094 
1095 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1096 {
1097 	struct skb_shared_info *shinfo = skb_shinfo(skb);
1098 	int nr_frags = shinfo->nr_frags;
1099 	int i;
1100 	u16 prev_pending_idx = INVALID_PENDING_IDX;
1101 
1102 	for (i = 0; i < nr_frags; i++) {
1103 		skb_frag_t *frag = shinfo->frags + i;
1104 		struct xen_netif_tx_request *txp;
1105 		struct page *page;
1106 		u16 pending_idx;
1107 
1108 		pending_idx = frag_get_pending_idx(frag);
1109 
1110 		/* If this is not the first frag, chain it to the previous*/
1111 		if (prev_pending_idx == INVALID_PENDING_IDX)
1112 			skb_shinfo(skb)->destructor_arg =
1113 				&callback_param(vif, pending_idx);
1114 		else
1115 			callback_param(vif, prev_pending_idx).ctx =
1116 				&callback_param(vif, pending_idx);
1117 
1118 		callback_param(vif, pending_idx).ctx = NULL;
1119 		prev_pending_idx = pending_idx;
1120 
1121 		txp = &vif->pending_tx_info[pending_idx].req;
1122 		page = virt_to_page(idx_to_kaddr(vif, pending_idx));
1123 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
1124 		skb->len += txp->size;
1125 		skb->data_len += txp->size;
1126 		skb->truesize += txp->size;
1127 
1128 		/* Take an extra reference to offset network stack's put_page */
1129 		get_page(vif->mmap_pages[pending_idx]);
1130 	}
1131 	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1132 	 * overlaps with "index", and "mapping" is not set. I think mapping
1133 	 * should be set. If delivered to local stack, it would drop this
1134 	 * skb in sk_filter unless the socket has the right to use it.
1135 	 */
1136 	skb->pfmemalloc	= false;
1137 }
1138 
1139 static int xenvif_get_extras(struct xenvif *vif,
1140 				struct xen_netif_extra_info *extras,
1141 				int work_to_do)
1142 {
1143 	struct xen_netif_extra_info extra;
1144 	RING_IDX cons = vif->tx.req_cons;
1145 
1146 	do {
1147 		if (unlikely(work_to_do-- <= 0)) {
1148 			netdev_err(vif->dev, "Missing extra info\n");
1149 			xenvif_fatal_tx_err(vif);
1150 			return -EBADR;
1151 		}
1152 
1153 		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
1154 		       sizeof(extra));
1155 		if (unlikely(!extra.type ||
1156 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1157 			vif->tx.req_cons = ++cons;
1158 			netdev_err(vif->dev,
1159 				   "Invalid extra type: %d\n", extra.type);
1160 			xenvif_fatal_tx_err(vif);
1161 			return -EINVAL;
1162 		}
1163 
1164 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1165 		vif->tx.req_cons = ++cons;
1166 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1167 
1168 	return work_to_do;
1169 }
1170 
1171 static int xenvif_set_skb_gso(struct xenvif *vif,
1172 			      struct sk_buff *skb,
1173 			      struct xen_netif_extra_info *gso)
1174 {
1175 	if (!gso->u.gso.size) {
1176 		netdev_err(vif->dev, "GSO size must not be zero.\n");
1177 		xenvif_fatal_tx_err(vif);
1178 		return -EINVAL;
1179 	}
1180 
1181 	switch (gso->u.gso.type) {
1182 	case XEN_NETIF_GSO_TYPE_TCPV4:
1183 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1184 		break;
1185 	case XEN_NETIF_GSO_TYPE_TCPV6:
1186 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1187 		break;
1188 	default:
1189 		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1190 		xenvif_fatal_tx_err(vif);
1191 		return -EINVAL;
1192 	}
1193 
1194 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1195 	/* gso_segs will be calculated later */
1196 
1197 	return 0;
1198 }
1199 
1200 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1201 {
1202 	bool recalculate_partial_csum = false;
1203 
1204 	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1205 	 * peers can fail to set NETRXF_csum_blank when sending a GSO
1206 	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1207 	 * recalculate the partial checksum.
1208 	 */
1209 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1210 		vif->rx_gso_checksum_fixup++;
1211 		skb->ip_summed = CHECKSUM_PARTIAL;
1212 		recalculate_partial_csum = true;
1213 	}
1214 
1215 	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1216 	if (skb->ip_summed != CHECKSUM_PARTIAL)
1217 		return 0;
1218 
1219 	return skb_checksum_setup(skb, recalculate_partial_csum);
1220 }
1221 
1222 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1223 {
1224 	u64 now = get_jiffies_64();
1225 	u64 next_credit = vif->credit_window_start +
1226 		msecs_to_jiffies(vif->credit_usec / 1000);
1227 
1228 	/* Timer could already be pending in rare cases. */
1229 	if (timer_pending(&vif->credit_timeout))
1230 		return true;
1231 
1232 	/* Passed the point where we can replenish credit? */
1233 	if (time_after_eq64(now, next_credit)) {
1234 		vif->credit_window_start = now;
1235 		tx_add_credit(vif);
1236 	}
1237 
1238 	/* Still too big to send right now? Set a callback. */
1239 	if (size > vif->remaining_credit) {
1240 		vif->credit_timeout.data     =
1241 			(unsigned long)vif;
1242 		vif->credit_timeout.function =
1243 			tx_credit_callback;
1244 		mod_timer(&vif->credit_timeout,
1245 			  next_credit);
1246 		vif->credit_window_start = next_credit;
1247 
1248 		return true;
1249 	}
1250 
1251 	return false;
1252 }
1253 
1254 static void xenvif_tx_build_gops(struct xenvif *vif,
1255 				     int budget,
1256 				     unsigned *copy_ops,
1257 				     unsigned *map_ops)
1258 {
1259 	struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
1260 	struct sk_buff *skb;
1261 	int ret;
1262 
1263 	while (skb_queue_len(&vif->tx_queue) < budget) {
1264 		struct xen_netif_tx_request txreq;
1265 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1266 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1267 		u16 pending_idx;
1268 		RING_IDX idx;
1269 		int work_to_do;
1270 		unsigned int data_len;
1271 		pending_ring_idx_t index;
1272 
1273 		if (vif->tx.sring->req_prod - vif->tx.req_cons >
1274 		    XEN_NETIF_TX_RING_SIZE) {
1275 			netdev_err(vif->dev,
1276 				   "Impossible number of requests. "
1277 				   "req_prod %d, req_cons %d, size %ld\n",
1278 				   vif->tx.sring->req_prod, vif->tx.req_cons,
1279 				   XEN_NETIF_TX_RING_SIZE);
1280 			xenvif_fatal_tx_err(vif);
1281 			break;
1282 		}
1283 
1284 		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
1285 		if (!work_to_do)
1286 			break;
1287 
1288 		idx = vif->tx.req_cons;
1289 		rmb(); /* Ensure that we see the request before we copy it. */
1290 		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
1291 
1292 		/* Credit-based scheduling. */
1293 		if (txreq.size > vif->remaining_credit &&
1294 		    tx_credit_exceeded(vif, txreq.size))
1295 			break;
1296 
1297 		vif->remaining_credit -= txreq.size;
1298 
1299 		work_to_do--;
1300 		vif->tx.req_cons = ++idx;
1301 
1302 		memset(extras, 0, sizeof(extras));
1303 		if (txreq.flags & XEN_NETTXF_extra_info) {
1304 			work_to_do = xenvif_get_extras(vif, extras,
1305 						       work_to_do);
1306 			idx = vif->tx.req_cons;
1307 			if (unlikely(work_to_do < 0))
1308 				break;
1309 		}
1310 
1311 		ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
1312 		if (unlikely(ret < 0))
1313 			break;
1314 
1315 		idx += ret;
1316 
1317 		if (unlikely(txreq.size < ETH_HLEN)) {
1318 			netdev_dbg(vif->dev,
1319 				   "Bad packet size: %d\n", txreq.size);
1320 			xenvif_tx_err(vif, &txreq, idx);
1321 			break;
1322 		}
1323 
1324 		/* No crossing a page as the payload mustn't fragment. */
1325 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1326 			netdev_err(vif->dev,
1327 				   "txreq.offset: %x, size: %u, end: %lu\n",
1328 				   txreq.offset, txreq.size,
1329 				   (txreq.offset&~PAGE_MASK) + txreq.size);
1330 			xenvif_fatal_tx_err(vif);
1331 			break;
1332 		}
1333 
1334 		index = pending_index(vif->pending_cons);
1335 		pending_idx = vif->pending_ring[index];
1336 
1337 		data_len = (txreq.size > PKT_PROT_LEN &&
1338 			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
1339 			PKT_PROT_LEN : txreq.size;
1340 
1341 		skb = xenvif_alloc_skb(data_len);
1342 		if (unlikely(skb == NULL)) {
1343 			netdev_dbg(vif->dev,
1344 				   "Can't allocate a skb in start_xmit.\n");
1345 			xenvif_tx_err(vif, &txreq, idx);
1346 			break;
1347 		}
1348 
1349 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1350 			struct xen_netif_extra_info *gso;
1351 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1352 
1353 			if (xenvif_set_skb_gso(vif, skb, gso)) {
1354 				/* Failure in xenvif_set_skb_gso is fatal. */
1355 				kfree_skb(skb);
1356 				break;
1357 			}
1358 		}
1359 
1360 		XENVIF_TX_CB(skb)->pending_idx = pending_idx;
1361 
1362 		__skb_put(skb, data_len);
1363 		vif->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
1364 		vif->tx_copy_ops[*copy_ops].source.domid = vif->domid;
1365 		vif->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
1366 
1367 		vif->tx_copy_ops[*copy_ops].dest.u.gmfn =
1368 			virt_to_mfn(skb->data);
1369 		vif->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
1370 		vif->tx_copy_ops[*copy_ops].dest.offset =
1371 			offset_in_page(skb->data);
1372 
1373 		vif->tx_copy_ops[*copy_ops].len = data_len;
1374 		vif->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
1375 
1376 		(*copy_ops)++;
1377 
1378 		skb_shinfo(skb)->nr_frags = ret;
1379 		if (data_len < txreq.size) {
1380 			skb_shinfo(skb)->nr_frags++;
1381 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1382 					     pending_idx);
1383 			xenvif_tx_create_map_op(vif, pending_idx, &txreq, gop);
1384 			gop++;
1385 		} else {
1386 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1387 					     INVALID_PENDING_IDX);
1388 			memcpy(&vif->pending_tx_info[pending_idx].req, &txreq,
1389 			       sizeof(txreq));
1390 		}
1391 
1392 		vif->pending_cons++;
1393 
1394 		request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
1395 		if (request_gop == NULL) {
1396 			kfree_skb(skb);
1397 			xenvif_tx_err(vif, &txreq, idx);
1398 			break;
1399 		}
1400 		gop = request_gop;
1401 
1402 		__skb_queue_tail(&vif->tx_queue, skb);
1403 
1404 		vif->tx.req_cons = idx;
1405 
1406 		if (((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops)) ||
1407 		    (*copy_ops >= ARRAY_SIZE(vif->tx_copy_ops)))
1408 			break;
1409 	}
1410 
1411 	(*map_ops) = gop - vif->tx_map_ops;
1412 	return;
1413 }
1414 
1415 /* Consolidate skb with a frag_list into a brand new one with local pages on
1416  * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1417  */
1418 static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
1419 {
1420 	unsigned int offset = skb_headlen(skb);
1421 	skb_frag_t frags[MAX_SKB_FRAGS];
1422 	int i;
1423 	struct ubuf_info *uarg;
1424 	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1425 
1426 	vif->tx_zerocopy_sent += 2;
1427 	vif->tx_frag_overflow++;
1428 
1429 	xenvif_fill_frags(vif, nskb);
1430 	/* Subtract frags size, we will correct it later */
1431 	skb->truesize -= skb->data_len;
1432 	skb->len += nskb->len;
1433 	skb->data_len += nskb->len;
1434 
1435 	/* create a brand new frags array and coalesce there */
1436 	for (i = 0; offset < skb->len; i++) {
1437 		struct page *page;
1438 		unsigned int len;
1439 
1440 		BUG_ON(i >= MAX_SKB_FRAGS);
1441 		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
1442 		if (!page) {
1443 			int j;
1444 			skb->truesize += skb->data_len;
1445 			for (j = 0; j < i; j++)
1446 				put_page(frags[j].page.p);
1447 			return -ENOMEM;
1448 		}
1449 
1450 		if (offset + PAGE_SIZE < skb->len)
1451 			len = PAGE_SIZE;
1452 		else
1453 			len = skb->len - offset;
1454 		if (skb_copy_bits(skb, offset, page_address(page), len))
1455 			BUG();
1456 
1457 		offset += len;
1458 		frags[i].page.p = page;
1459 		frags[i].page_offset = 0;
1460 		skb_frag_size_set(&frags[i], len);
1461 	}
1462 	/* swap out with old one */
1463 	memcpy(skb_shinfo(skb)->frags,
1464 	       frags,
1465 	       i * sizeof(skb_frag_t));
1466 	skb_shinfo(skb)->nr_frags = i;
1467 	skb->truesize += i * PAGE_SIZE;
1468 
1469 	/* remove traces of mapped pages and frag_list */
1470 	skb_frag_list_init(skb);
1471 	uarg = skb_shinfo(skb)->destructor_arg;
1472 	uarg->callback(uarg, true);
1473 	skb_shinfo(skb)->destructor_arg = NULL;
1474 
1475 	skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1476 	kfree_skb(nskb);
1477 
1478 	return 0;
1479 }
1480 
1481 static int xenvif_tx_submit(struct xenvif *vif)
1482 {
1483 	struct gnttab_map_grant_ref *gop_map = vif->tx_map_ops;
1484 	struct gnttab_copy *gop_copy = vif->tx_copy_ops;
1485 	struct sk_buff *skb;
1486 	int work_done = 0;
1487 
1488 	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
1489 		struct xen_netif_tx_request *txp;
1490 		u16 pending_idx;
1491 		unsigned data_len;
1492 
1493 		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1494 		txp = &vif->pending_tx_info[pending_idx].req;
1495 
1496 		/* Check the remap error code. */
1497 		if (unlikely(xenvif_tx_check_gop(vif, skb, &gop_map, &gop_copy))) {
1498 			skb_shinfo(skb)->nr_frags = 0;
1499 			kfree_skb(skb);
1500 			continue;
1501 		}
1502 
1503 		data_len = skb->len;
1504 		callback_param(vif, pending_idx).ctx = NULL;
1505 		if (data_len < txp->size) {
1506 			/* Append the packet payload as a fragment. */
1507 			txp->offset += data_len;
1508 			txp->size -= data_len;
1509 		} else {
1510 			/* Schedule a response immediately. */
1511 			xenvif_idx_release(vif, pending_idx,
1512 					   XEN_NETIF_RSP_OKAY);
1513 		}
1514 
1515 		if (txp->flags & XEN_NETTXF_csum_blank)
1516 			skb->ip_summed = CHECKSUM_PARTIAL;
1517 		else if (txp->flags & XEN_NETTXF_data_validated)
1518 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1519 
1520 		xenvif_fill_frags(vif, skb);
1521 
1522 		if (unlikely(skb_has_frag_list(skb))) {
1523 			if (xenvif_handle_frag_list(vif, skb)) {
1524 				if (net_ratelimit())
1525 					netdev_err(vif->dev,
1526 						   "Not enough memory to consolidate frag_list!\n");
1527 				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1528 				kfree_skb(skb);
1529 				continue;
1530 			}
1531 		}
1532 
1533 		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1534 			int target = min_t(int, skb->len, PKT_PROT_LEN);
1535 			__pskb_pull_tail(skb, target - skb_headlen(skb));
1536 		}
1537 
1538 		skb->dev      = vif->dev;
1539 		skb->protocol = eth_type_trans(skb, skb->dev);
1540 		skb_reset_network_header(skb);
1541 
1542 		if (checksum_setup(vif, skb)) {
1543 			netdev_dbg(vif->dev,
1544 				   "Can't setup checksum in net_tx_action\n");
1545 			/* We have to set this flag to trigger the callback */
1546 			if (skb_shinfo(skb)->destructor_arg)
1547 				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1548 			kfree_skb(skb);
1549 			continue;
1550 		}
1551 
1552 		skb_probe_transport_header(skb, 0);
1553 
1554 		/* If the packet is GSO then we will have just set up the
1555 		 * transport header offset in checksum_setup so it's now
1556 		 * straightforward to calculate gso_segs.
1557 		 */
1558 		if (skb_is_gso(skb)) {
1559 			int mss = skb_shinfo(skb)->gso_size;
1560 			int hdrlen = skb_transport_header(skb) -
1561 				skb_mac_header(skb) +
1562 				tcp_hdrlen(skb);
1563 
1564 			skb_shinfo(skb)->gso_segs =
1565 				DIV_ROUND_UP(skb->len - hdrlen, mss);
1566 		}
1567 
1568 		vif->dev->stats.rx_bytes += skb->len;
1569 		vif->dev->stats.rx_packets++;
1570 
1571 		work_done++;
1572 
1573 		/* Set this flag right before netif_receive_skb, otherwise
1574 		 * someone might think this packet already left netback, and
1575 		 * do a skb_copy_ubufs while we are still in control of the
1576 		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1577 		 */
1578 		if (skb_shinfo(skb)->destructor_arg) {
1579 			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1580 			vif->tx_zerocopy_sent++;
1581 		}
1582 
1583 		netif_receive_skb(skb);
1584 	}
1585 
1586 	return work_done;
1587 }
1588 
1589 void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
1590 {
1591 	unsigned long flags;
1592 	pending_ring_idx_t index;
1593 	struct xenvif *vif = ubuf_to_vif(ubuf);
1594 
1595 	/* This is the only place where we grab this lock, to protect callbacks
1596 	 * from each other.
1597 	 */
1598 	spin_lock_irqsave(&vif->callback_lock, flags);
1599 	do {
1600 		u16 pending_idx = ubuf->desc;
1601 		ubuf = (struct ubuf_info *) ubuf->ctx;
1602 		BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
1603 			MAX_PENDING_REQS);
1604 		index = pending_index(vif->dealloc_prod);
1605 		vif->dealloc_ring[index] = pending_idx;
1606 		/* Sync with xenvif_tx_dealloc_action:
1607 		 * insert idx then incr producer.
1608 		 */
1609 		smp_wmb();
1610 		vif->dealloc_prod++;
1611 	} while (ubuf);
1612 	wake_up(&vif->dealloc_wq);
1613 	spin_unlock_irqrestore(&vif->callback_lock, flags);
1614 
1615 	if (likely(zerocopy_success))
1616 		vif->tx_zerocopy_success++;
1617 	else
1618 		vif->tx_zerocopy_fail++;
1619 }
1620 
1621 static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
1622 {
1623 	struct gnttab_unmap_grant_ref *gop;
1624 	pending_ring_idx_t dc, dp;
1625 	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1626 	unsigned int i = 0;
1627 
1628 	dc = vif->dealloc_cons;
1629 	gop = vif->tx_unmap_ops;
1630 
1631 	/* Free up any grants we have finished using */
1632 	do {
1633 		dp = vif->dealloc_prod;
1634 
1635 		/* Ensure we see all indices enqueued by all
1636 		 * xenvif_zerocopy_callback().
1637 		 */
1638 		smp_rmb();
1639 
1640 		while (dc != dp) {
1641 			BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
1642 			pending_idx =
1643 				vif->dealloc_ring[pending_index(dc++)];
1644 
1645 			pending_idx_release[gop-vif->tx_unmap_ops] =
1646 				pending_idx;
1647 			vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
1648 				vif->mmap_pages[pending_idx];
1649 			gnttab_set_unmap_op(gop,
1650 					    idx_to_kaddr(vif, pending_idx),
1651 					    GNTMAP_host_map,
1652 					    vif->grant_tx_handle[pending_idx]);
1653 			xenvif_grant_handle_reset(vif, pending_idx);
1654 			++gop;
1655 		}
1656 
1657 	} while (dp != vif->dealloc_prod);
1658 
1659 	vif->dealloc_cons = dc;
1660 
1661 	if (gop - vif->tx_unmap_ops > 0) {
1662 		int ret;
1663 		ret = gnttab_unmap_refs(vif->tx_unmap_ops,
1664 					NULL,
1665 					vif->pages_to_unmap,
1666 					gop - vif->tx_unmap_ops);
1667 		if (ret) {
1668 			netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
1669 				   gop - vif->tx_unmap_ops, ret);
1670 			for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
1671 				if (gop[i].status != GNTST_okay)
1672 					netdev_err(vif->dev,
1673 						   " host_addr: %llx handle: %x status: %d\n",
1674 						   gop[i].host_addr,
1675 						   gop[i].handle,
1676 						   gop[i].status);
1677 			}
1678 			BUG();
1679 		}
1680 	}
1681 
1682 	for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
1683 		xenvif_idx_release(vif, pending_idx_release[i],
1684 				   XEN_NETIF_RSP_OKAY);
1685 }
1686 
1687 
1688 /* Called after netfront has transmitted */
1689 int xenvif_tx_action(struct xenvif *vif, int budget)
1690 {
1691 	unsigned nr_mops, nr_cops = 0;
1692 	int work_done, ret;
1693 
1694 	if (unlikely(!tx_work_todo(vif)))
1695 		return 0;
1696 
1697 	xenvif_tx_build_gops(vif, budget, &nr_cops, &nr_mops);
1698 
1699 	if (nr_cops == 0)
1700 		return 0;
1701 
1702 	gnttab_batch_copy(vif->tx_copy_ops, nr_cops);
1703 	if (nr_mops != 0) {
1704 		ret = gnttab_map_refs(vif->tx_map_ops,
1705 				      NULL,
1706 				      vif->pages_to_map,
1707 				      nr_mops);
1708 		BUG_ON(ret);
1709 	}
1710 
1711 	work_done = xenvif_tx_submit(vif);
1712 
1713 	return work_done;
1714 }
1715 
1716 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
1717 			       u8 status)
1718 {
1719 	struct pending_tx_info *pending_tx_info;
1720 	pending_ring_idx_t index;
1721 	unsigned long flags;
1722 
1723 	pending_tx_info = &vif->pending_tx_info[pending_idx];
1724 	spin_lock_irqsave(&vif->response_lock, flags);
1725 	make_tx_response(vif, &pending_tx_info->req, status);
1726 	index = pending_index(vif->pending_prod);
1727 	vif->pending_ring[index] = pending_idx;
1728 	/* TX shouldn't use the index before we give it back here */
1729 	mb();
1730 	vif->pending_prod++;
1731 	spin_unlock_irqrestore(&vif->response_lock, flags);
1732 }
1733 
1734 
1735 static void make_tx_response(struct xenvif *vif,
1736 			     struct xen_netif_tx_request *txp,
1737 			     s8       st)
1738 {
1739 	RING_IDX i = vif->tx.rsp_prod_pvt;
1740 	struct xen_netif_tx_response *resp;
1741 	int notify;
1742 
1743 	resp = RING_GET_RESPONSE(&vif->tx, i);
1744 	resp->id     = txp->id;
1745 	resp->status = st;
1746 
1747 	if (txp->flags & XEN_NETTXF_extra_info)
1748 		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1749 
1750 	vif->tx.rsp_prod_pvt = ++i;
1751 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
1752 	if (notify)
1753 		notify_remote_via_irq(vif->tx_irq);
1754 }
1755 
1756 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1757 					     u16      id,
1758 					     s8       st,
1759 					     u16      offset,
1760 					     u16      size,
1761 					     u16      flags)
1762 {
1763 	RING_IDX i = vif->rx.rsp_prod_pvt;
1764 	struct xen_netif_rx_response *resp;
1765 
1766 	resp = RING_GET_RESPONSE(&vif->rx, i);
1767 	resp->offset     = offset;
1768 	resp->flags      = flags;
1769 	resp->id         = id;
1770 	resp->status     = (s16)size;
1771 	if (st < 0)
1772 		resp->status = (s16)st;
1773 
1774 	vif->rx.rsp_prod_pvt = ++i;
1775 
1776 	return resp;
1777 }
1778 
1779 void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
1780 {
1781 	int ret;
1782 	struct gnttab_unmap_grant_ref tx_unmap_op;
1783 
1784 	gnttab_set_unmap_op(&tx_unmap_op,
1785 			    idx_to_kaddr(vif, pending_idx),
1786 			    GNTMAP_host_map,
1787 			    vif->grant_tx_handle[pending_idx]);
1788 	xenvif_grant_handle_reset(vif, pending_idx);
1789 
1790 	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
1791 				&vif->mmap_pages[pending_idx], 1);
1792 	if (ret) {
1793 		netdev_err(vif->dev,
1794 			   "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1795 			   ret,
1796 			   pending_idx,
1797 			   tx_unmap_op.host_addr,
1798 			   tx_unmap_op.handle,
1799 			   tx_unmap_op.status);
1800 		BUG();
1801 	}
1802 
1803 	xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1804 }
1805 
1806 static inline int rx_work_todo(struct xenvif *vif)
1807 {
1808 	return (!skb_queue_empty(&vif->rx_queue) &&
1809 	       xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
1810 	       vif->rx_queue_purge;
1811 }
1812 
1813 static inline int tx_work_todo(struct xenvif *vif)
1814 {
1815 
1816 	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)))
1817 		return 1;
1818 
1819 	return 0;
1820 }
1821 
1822 static inline bool tx_dealloc_work_todo(struct xenvif *vif)
1823 {
1824 	return vif->dealloc_cons != vif->dealloc_prod;
1825 }
1826 
1827 void xenvif_unmap_frontend_rings(struct xenvif *vif)
1828 {
1829 	if (vif->tx.sring)
1830 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1831 					vif->tx.sring);
1832 	if (vif->rx.sring)
1833 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1834 					vif->rx.sring);
1835 }
1836 
1837 int xenvif_map_frontend_rings(struct xenvif *vif,
1838 			      grant_ref_t tx_ring_ref,
1839 			      grant_ref_t rx_ring_ref)
1840 {
1841 	void *addr;
1842 	struct xen_netif_tx_sring *txs;
1843 	struct xen_netif_rx_sring *rxs;
1844 
1845 	int err = -ENOMEM;
1846 
1847 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1848 				     tx_ring_ref, &addr);
1849 	if (err)
1850 		goto err;
1851 
1852 	txs = (struct xen_netif_tx_sring *)addr;
1853 	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
1854 
1855 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1856 				     rx_ring_ref, &addr);
1857 	if (err)
1858 		goto err;
1859 
1860 	rxs = (struct xen_netif_rx_sring *)addr;
1861 	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
1862 
1863 	return 0;
1864 
1865 err:
1866 	xenvif_unmap_frontend_rings(vif);
1867 	return err;
1868 }
1869 
1870 void xenvif_stop_queue(struct xenvif *vif)
1871 {
1872 	if (!vif->can_queue)
1873 		return;
1874 
1875 	netif_stop_queue(vif->dev);
1876 }
1877 
1878 static void xenvif_start_queue(struct xenvif *vif)
1879 {
1880 	if (xenvif_schedulable(vif))
1881 		netif_wake_queue(vif->dev);
1882 }
1883 
1884 int xenvif_kthread_guest_rx(void *data)
1885 {
1886 	struct xenvif *vif = data;
1887 	struct sk_buff *skb;
1888 
1889 	while (!kthread_should_stop()) {
1890 		wait_event_interruptible(vif->wq,
1891 					 rx_work_todo(vif) ||
1892 					 vif->disabled ||
1893 					 kthread_should_stop());
1894 
1895 		/* This frontend is found to be rogue, disable it in
1896 		 * kthread context. Currently this is only set when
1897 		 * netback finds out frontend sends malformed packet,
1898 		 * but we cannot disable the interface in softirq
1899 		 * context so we defer it here.
1900 		 */
1901 		if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))
1902 			xenvif_carrier_off(vif);
1903 
1904 		if (kthread_should_stop())
1905 			break;
1906 
1907 		if (vif->rx_queue_purge) {
1908 			skb_queue_purge(&vif->rx_queue);
1909 			vif->rx_queue_purge = false;
1910 		}
1911 
1912 		if (!skb_queue_empty(&vif->rx_queue))
1913 			xenvif_rx_action(vif);
1914 
1915 		if (skb_queue_empty(&vif->rx_queue) &&
1916 		    netif_queue_stopped(vif->dev)) {
1917 			del_timer_sync(&vif->wake_queue);
1918 			xenvif_start_queue(vif);
1919 		}
1920 
1921 		cond_resched();
1922 	}
1923 
1924 	/* Bin any remaining skbs */
1925 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
1926 		dev_kfree_skb(skb);
1927 
1928 	return 0;
1929 }
1930 
1931 int xenvif_dealloc_kthread(void *data)
1932 {
1933 	struct xenvif *vif = data;
1934 
1935 	while (!kthread_should_stop()) {
1936 		wait_event_interruptible(vif->dealloc_wq,
1937 					 tx_dealloc_work_todo(vif) ||
1938 					 kthread_should_stop());
1939 		if (kthread_should_stop())
1940 			break;
1941 
1942 		xenvif_tx_dealloc_action(vif);
1943 		cond_resched();
1944 	}
1945 
1946 	/* Unmap anything remaining*/
1947 	if (tx_dealloc_work_todo(vif))
1948 		xenvif_tx_dealloc_action(vif);
1949 
1950 	return 0;
1951 }
1952 
1953 static int __init netback_init(void)
1954 {
1955 	int rc = 0;
1956 
1957 	if (!xen_domain())
1958 		return -ENODEV;
1959 
1960 	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1961 		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1962 			fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1963 		fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1964 	}
1965 
1966 	rc = xenvif_xenbus_init();
1967 	if (rc)
1968 		goto failed_init;
1969 
1970 	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
1971 
1972 	return 0;
1973 
1974 failed_init:
1975 	return rc;
1976 }
1977 
1978 module_init(netback_init);
1979 
1980 static void __exit netback_fini(void)
1981 {
1982 	xenvif_xenbus_fini();
1983 }
1984 module_exit(netback_fini);
1985 
1986 MODULE_LICENSE("Dual BSD/GPL");
1987 MODULE_ALIAS("xen-backend:vif");
1988