1 /*
2  * Back-end of the driver for virtual network devices. This portion of the
3  * driver exports a 'unified' network-device interface that can be accessed
4  * by any operating system that implements a compatible front end. A
5  * reference front-end implementation can be found in:
6  *  drivers/net/xen-netfront.c
7  *
8  * Copyright (c) 2002-2005, K A Fraser
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License version 2
12  * as published by the Free Software Foundation; or, when distributed
13  * separately from the Linux kernel or incorporated into other
14  * software packages, subject to the following license:
15  *
16  * Permission is hereby granted, free of charge, to any person obtaining a copy
17  * of this source file (the "Software"), to deal in the Software without
18  * restriction, including without limitation the rights to use, copy, modify,
19  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20  * and to permit persons to whom the Software is furnished to do so, subject to
21  * the following conditions:
22  *
23  * The above copyright notice and this permission notice shall be included in
24  * all copies or substantial portions of the Software.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32  * IN THE SOFTWARE.
33  */
34 
35 #include "common.h"
36 
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
41 
42 #include <net/tcp.h>
43 
44 #include <xen/xen.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
47 
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
50 
51 /* Provide an option to disable split event channels at load time as
52  * event channels are limited resource. Split event channels are
53  * enabled by default.
54  */
55 bool separate_tx_rx_irq = 1;
56 module_param(separate_tx_rx_irq, bool, 0644);
57 
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59  * to timeout them, otherwise other guests' packets can get stuck there
60  */
61 unsigned int rx_drain_timeout_msecs = 10000;
62 module_param(rx_drain_timeout_msecs, uint, 0444);
63 unsigned int rx_drain_timeout_jiffies;
64 
65 unsigned int xenvif_max_queues;
66 module_param_named(max_queues, xenvif_max_queues, uint, 0644);
67 MODULE_PARM_DESC(max_queues,
68 		 "Maximum number of queues per virtual interface");
69 
70 /*
71  * This is the maximum slots a skb can have. If a guest sends a skb
72  * which exceeds this limit it is considered malicious.
73  */
74 #define FATAL_SKB_SLOTS_DEFAULT 20
75 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
76 module_param(fatal_skb_slots, uint, 0444);
77 
78 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
79 			       u8 status);
80 
81 static void make_tx_response(struct xenvif_queue *queue,
82 			     struct xen_netif_tx_request *txp,
83 			     s8       st);
84 
85 static inline int tx_work_todo(struct xenvif_queue *queue);
86 static inline int rx_work_todo(struct xenvif_queue *queue);
87 
88 static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
89 					     u16      id,
90 					     s8       st,
91 					     u16      offset,
92 					     u16      size,
93 					     u16      flags);
94 
95 static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
96 				       u16 idx)
97 {
98 	return page_to_pfn(queue->mmap_pages[idx]);
99 }
100 
101 static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
102 					 u16 idx)
103 {
104 	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
105 }
106 
107 #define callback_param(vif, pending_idx) \
108 	(vif->pending_tx_info[pending_idx].callback_struct)
109 
110 /* Find the containing VIF's structure from a pointer in pending_tx_info array
111  */
112 static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
113 {
114 	u16 pending_idx = ubuf->desc;
115 	struct pending_tx_info *temp =
116 		container_of(ubuf, struct pending_tx_info, callback_struct);
117 	return container_of(temp - pending_idx,
118 			    struct xenvif_queue,
119 			    pending_tx_info[0]);
120 }
121 
122 /* This is a miniumum size for the linear area to avoid lots of
123  * calls to __pskb_pull_tail() as we set up checksum offsets. The
124  * value 128 was chosen as it covers all IPv4 and most likely
125  * IPv6 headers.
126  */
127 #define PKT_PROT_LEN 128
128 
129 static u16 frag_get_pending_idx(skb_frag_t *frag)
130 {
131 	return (u16)frag->page_offset;
132 }
133 
134 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
135 {
136 	frag->page_offset = pending_idx;
137 }
138 
139 static inline pending_ring_idx_t pending_index(unsigned i)
140 {
141 	return i & (MAX_PENDING_REQS-1);
142 }
143 
144 bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
145 {
146 	RING_IDX prod, cons;
147 
148 	do {
149 		prod = queue->rx.sring->req_prod;
150 		cons = queue->rx.req_cons;
151 
152 		if (prod - cons >= needed)
153 			return true;
154 
155 		queue->rx.sring->req_event = prod + 1;
156 
157 		/* Make sure event is visible before we check prod
158 		 * again.
159 		 */
160 		mb();
161 	} while (queue->rx.sring->req_prod != prod);
162 
163 	return false;
164 }
165 
166 /*
167  * Returns true if we should start a new receive buffer instead of
168  * adding 'size' bytes to a buffer which currently contains 'offset'
169  * bytes.
170  */
171 static bool start_new_rx_buffer(int offset, unsigned long size, int head,
172 				bool full_coalesce)
173 {
174 	/* simple case: we have completely filled the current buffer. */
175 	if (offset == MAX_BUFFER_OFFSET)
176 		return true;
177 
178 	/*
179 	 * complex case: start a fresh buffer if the current frag
180 	 * would overflow the current buffer but only if:
181 	 *     (i)   this frag would fit completely in the next buffer
182 	 * and (ii)  there is already some data in the current buffer
183 	 * and (iii) this is not the head buffer.
184 	 * and (iv)  there is no need to fully utilize the buffers
185 	 *
186 	 * Where:
187 	 * - (i) stops us splitting a frag into two copies
188 	 *   unless the frag is too large for a single buffer.
189 	 * - (ii) stops us from leaving a buffer pointlessly empty.
190 	 * - (iii) stops us leaving the first buffer
191 	 *   empty. Strictly speaking this is already covered
192 	 *   by (ii) but is explicitly checked because
193 	 *   netfront relies on the first buffer being
194 	 *   non-empty and can crash otherwise.
195 	 * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
196 	 *   slot
197 	 *
198 	 * This means we will effectively linearise small
199 	 * frags but do not needlessly split large buffers
200 	 * into multiple copies tend to give large frags their
201 	 * own buffers as before.
202 	 */
203 	BUG_ON(size > MAX_BUFFER_OFFSET);
204 	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
205 	    !full_coalesce)
206 		return true;
207 
208 	return false;
209 }
210 
211 struct netrx_pending_operations {
212 	unsigned copy_prod, copy_cons;
213 	unsigned meta_prod, meta_cons;
214 	struct gnttab_copy *copy;
215 	struct xenvif_rx_meta *meta;
216 	int copy_off;
217 	grant_ref_t copy_gref;
218 };
219 
220 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
221 						 struct netrx_pending_operations *npo)
222 {
223 	struct xenvif_rx_meta *meta;
224 	struct xen_netif_rx_request *req;
225 
226 	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
227 
228 	meta = npo->meta + npo->meta_prod++;
229 	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
230 	meta->gso_size = 0;
231 	meta->size = 0;
232 	meta->id = req->id;
233 
234 	npo->copy_off = 0;
235 	npo->copy_gref = req->gref;
236 
237 	return meta;
238 }
239 
240 struct xenvif_rx_cb {
241 	int meta_slots_used;
242 	bool full_coalesce;
243 };
244 
245 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
246 
247 /*
248  * Set up the grant operations for this fragment. If it's a flipping
249  * interface, we also set up the unmap request from here.
250  */
251 static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
252 				 struct netrx_pending_operations *npo,
253 				 struct page *page, unsigned long size,
254 				 unsigned long offset, int *head,
255 				 struct xenvif_queue *foreign_queue,
256 				 grant_ref_t foreign_gref)
257 {
258 	struct gnttab_copy *copy_gop;
259 	struct xenvif_rx_meta *meta;
260 	unsigned long bytes;
261 	int gso_type = XEN_NETIF_GSO_TYPE_NONE;
262 
263 	/* Data must not cross a page boundary. */
264 	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
265 
266 	meta = npo->meta + npo->meta_prod - 1;
267 
268 	/* Skip unused frames from start of page */
269 	page += offset >> PAGE_SHIFT;
270 	offset &= ~PAGE_MASK;
271 
272 	while (size > 0) {
273 		BUG_ON(offset >= PAGE_SIZE);
274 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
275 
276 		bytes = PAGE_SIZE - offset;
277 
278 		if (bytes > size)
279 			bytes = size;
280 
281 		if (start_new_rx_buffer(npo->copy_off,
282 					bytes,
283 					*head,
284 					XENVIF_RX_CB(skb)->full_coalesce)) {
285 			/*
286 			 * Netfront requires there to be some data in the head
287 			 * buffer.
288 			 */
289 			BUG_ON(*head);
290 
291 			meta = get_next_rx_buffer(queue, npo);
292 		}
293 
294 		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
295 			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
296 
297 		copy_gop = npo->copy + npo->copy_prod++;
298 		copy_gop->flags = GNTCOPY_dest_gref;
299 		copy_gop->len = bytes;
300 
301 		if (foreign_queue) {
302 			copy_gop->source.domid = foreign_queue->vif->domid;
303 			copy_gop->source.u.ref = foreign_gref;
304 			copy_gop->flags |= GNTCOPY_source_gref;
305 		} else {
306 			copy_gop->source.domid = DOMID_SELF;
307 			copy_gop->source.u.gmfn =
308 				virt_to_mfn(page_address(page));
309 		}
310 		copy_gop->source.offset = offset;
311 
312 		copy_gop->dest.domid = queue->vif->domid;
313 		copy_gop->dest.offset = npo->copy_off;
314 		copy_gop->dest.u.ref = npo->copy_gref;
315 
316 		npo->copy_off += bytes;
317 		meta->size += bytes;
318 
319 		offset += bytes;
320 		size -= bytes;
321 
322 		/* Next frame */
323 		if (offset == PAGE_SIZE && size) {
324 			BUG_ON(!PageCompound(page));
325 			page++;
326 			offset = 0;
327 		}
328 
329 		/* Leave a gap for the GSO descriptor. */
330 		if (skb_is_gso(skb)) {
331 			if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
332 				gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
333 			else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
334 				gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
335 		}
336 
337 		if (*head && ((1 << gso_type) & queue->vif->gso_mask))
338 			queue->rx.req_cons++;
339 
340 		*head = 0; /* There must be something in this buffer now. */
341 
342 	}
343 }
344 
345 /*
346  * Find the grant ref for a given frag in a chain of struct ubuf_info's
347  * skb: the skb itself
348  * i: the frag's number
349  * ubuf: a pointer to an element in the chain. It should not be NULL
350  *
351  * Returns a pointer to the element in the chain where the page were found. If
352  * not found, returns NULL.
353  * See the definition of callback_struct in common.h for more details about
354  * the chain.
355  */
356 static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb,
357 						const int i,
358 						const struct ubuf_info *ubuf)
359 {
360 	struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf);
361 
362 	do {
363 		u16 pending_idx = ubuf->desc;
364 
365 		if (skb_shinfo(skb)->frags[i].page.p ==
366 		    foreign_queue->mmap_pages[pending_idx])
367 			break;
368 		ubuf = (struct ubuf_info *) ubuf->ctx;
369 	} while (ubuf);
370 
371 	return ubuf;
372 }
373 
374 /*
375  * Prepare an SKB to be transmitted to the frontend.
376  *
377  * This function is responsible for allocating grant operations, meta
378  * structures, etc.
379  *
380  * It returns the number of meta structures consumed. The number of
381  * ring slots used is always equal to the number of meta slots used
382  * plus the number of GSO descriptors used. Currently, we use either
383  * zero GSO descriptors (for non-GSO packets) or one descriptor (for
384  * frontend-side LRO).
385  */
386 static int xenvif_gop_skb(struct sk_buff *skb,
387 			  struct netrx_pending_operations *npo,
388 			  struct xenvif_queue *queue)
389 {
390 	struct xenvif *vif = netdev_priv(skb->dev);
391 	int nr_frags = skb_shinfo(skb)->nr_frags;
392 	int i;
393 	struct xen_netif_rx_request *req;
394 	struct xenvif_rx_meta *meta;
395 	unsigned char *data;
396 	int head = 1;
397 	int old_meta_prod;
398 	int gso_type;
399 	const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
400 	const struct ubuf_info *const head_ubuf = ubuf;
401 
402 	old_meta_prod = npo->meta_prod;
403 
404 	gso_type = XEN_NETIF_GSO_TYPE_NONE;
405 	if (skb_is_gso(skb)) {
406 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
407 			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
408 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
409 			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
410 	}
411 
412 	/* Set up a GSO prefix descriptor, if necessary */
413 	if ((1 << gso_type) & vif->gso_prefix_mask) {
414 		req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
415 		meta = npo->meta + npo->meta_prod++;
416 		meta->gso_type = gso_type;
417 		meta->gso_size = skb_shinfo(skb)->gso_size;
418 		meta->size = 0;
419 		meta->id = req->id;
420 	}
421 
422 	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
423 	meta = npo->meta + npo->meta_prod++;
424 
425 	if ((1 << gso_type) & vif->gso_mask) {
426 		meta->gso_type = gso_type;
427 		meta->gso_size = skb_shinfo(skb)->gso_size;
428 	} else {
429 		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
430 		meta->gso_size = 0;
431 	}
432 
433 	meta->size = 0;
434 	meta->id = req->id;
435 	npo->copy_off = 0;
436 	npo->copy_gref = req->gref;
437 
438 	data = skb->data;
439 	while (data < skb_tail_pointer(skb)) {
440 		unsigned int offset = offset_in_page(data);
441 		unsigned int len = PAGE_SIZE - offset;
442 
443 		if (data + len > skb_tail_pointer(skb))
444 			len = skb_tail_pointer(skb) - data;
445 
446 		xenvif_gop_frag_copy(queue, skb, npo,
447 				     virt_to_page(data), len, offset, &head,
448 				     NULL,
449 				     0);
450 		data += len;
451 	}
452 
453 	for (i = 0; i < nr_frags; i++) {
454 		/* This variable also signals whether foreign_gref has a real
455 		 * value or not.
456 		 */
457 		struct xenvif_queue *foreign_queue = NULL;
458 		grant_ref_t foreign_gref;
459 
460 		if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
461 			(ubuf->callback == &xenvif_zerocopy_callback)) {
462 			const struct ubuf_info *const startpoint = ubuf;
463 
464 			/* Ideally ubuf points to the chain element which
465 			 * belongs to this frag. Or if frags were removed from
466 			 * the beginning, then shortly before it.
467 			 */
468 			ubuf = xenvif_find_gref(skb, i, ubuf);
469 
470 			/* Try again from the beginning of the list, if we
471 			 * haven't tried from there. This only makes sense in
472 			 * the unlikely event of reordering the original frags.
473 			 * For injected local pages it's an unnecessary second
474 			 * run.
475 			 */
476 			if (unlikely(!ubuf) && startpoint != head_ubuf)
477 				ubuf = xenvif_find_gref(skb, i, head_ubuf);
478 
479 			if (likely(ubuf)) {
480 				u16 pending_idx = ubuf->desc;
481 
482 				foreign_queue = ubuf_to_queue(ubuf);
483 				foreign_gref =
484 					foreign_queue->pending_tx_info[pending_idx].req.gref;
485 				/* Just a safety measure. If this was the last
486 				 * element on the list, the for loop will
487 				 * iterate again if a local page were added to
488 				 * the end. Using head_ubuf here prevents the
489 				 * second search on the chain. Or the original
490 				 * frags changed order, but that's less likely.
491 				 * In any way, ubuf shouldn't be NULL.
492 				 */
493 				ubuf = ubuf->ctx ?
494 					(struct ubuf_info *) ubuf->ctx :
495 					head_ubuf;
496 			} else
497 				/* This frag was a local page, added to the
498 				 * array after the skb left netback.
499 				 */
500 				ubuf = head_ubuf;
501 		}
502 		xenvif_gop_frag_copy(queue, skb, npo,
503 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
504 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
505 				     skb_shinfo(skb)->frags[i].page_offset,
506 				     &head,
507 				     foreign_queue,
508 				     foreign_queue ? foreign_gref : UINT_MAX);
509 	}
510 
511 	return npo->meta_prod - old_meta_prod;
512 }
513 
514 /*
515  * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
516  * used to set up the operations on the top of
517  * netrx_pending_operations, which have since been done.  Check that
518  * they didn't give any errors and advance over them.
519  */
520 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
521 			    struct netrx_pending_operations *npo)
522 {
523 	struct gnttab_copy     *copy_op;
524 	int status = XEN_NETIF_RSP_OKAY;
525 	int i;
526 
527 	for (i = 0; i < nr_meta_slots; i++) {
528 		copy_op = npo->copy + npo->copy_cons++;
529 		if (copy_op->status != GNTST_okay) {
530 			netdev_dbg(vif->dev,
531 				   "Bad status %d from copy to DOM%d.\n",
532 				   copy_op->status, vif->domid);
533 			status = XEN_NETIF_RSP_ERROR;
534 		}
535 	}
536 
537 	return status;
538 }
539 
540 static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
541 				      struct xenvif_rx_meta *meta,
542 				      int nr_meta_slots)
543 {
544 	int i;
545 	unsigned long offset;
546 
547 	/* No fragments used */
548 	if (nr_meta_slots <= 1)
549 		return;
550 
551 	nr_meta_slots--;
552 
553 	for (i = 0; i < nr_meta_slots; i++) {
554 		int flags;
555 		if (i == nr_meta_slots - 1)
556 			flags = 0;
557 		else
558 			flags = XEN_NETRXF_more_data;
559 
560 		offset = 0;
561 		make_rx_response(queue, meta[i].id, status, offset,
562 				 meta[i].size, flags);
563 	}
564 }
565 
566 void xenvif_kick_thread(struct xenvif_queue *queue)
567 {
568 	wake_up(&queue->wq);
569 }
570 
571 static void xenvif_rx_action(struct xenvif_queue *queue)
572 {
573 	s8 status;
574 	u16 flags;
575 	struct xen_netif_rx_response *resp;
576 	struct sk_buff_head rxq;
577 	struct sk_buff *skb;
578 	LIST_HEAD(notify);
579 	int ret;
580 	unsigned long offset;
581 	bool need_to_notify = false;
582 
583 	struct netrx_pending_operations npo = {
584 		.copy  = queue->grant_copy_op,
585 		.meta  = queue->meta,
586 	};
587 
588 	skb_queue_head_init(&rxq);
589 
590 	while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
591 		RING_IDX max_slots_needed;
592 		RING_IDX old_req_cons;
593 		RING_IDX ring_slots_used;
594 		int i;
595 
596 		/* We need a cheap worse case estimate for the number of
597 		 * slots we'll use.
598 		 */
599 
600 		max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
601 						skb_headlen(skb),
602 						PAGE_SIZE);
603 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
604 			unsigned int size;
605 			unsigned int offset;
606 
607 			size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
608 			offset = skb_shinfo(skb)->frags[i].page_offset;
609 
610 			/* For a worse-case estimate we need to factor in
611 			 * the fragment page offset as this will affect the
612 			 * number of times xenvif_gop_frag_copy() will
613 			 * call start_new_rx_buffer().
614 			 */
615 			max_slots_needed += DIV_ROUND_UP(offset + size,
616 							 PAGE_SIZE);
617 		}
618 
619 		/* To avoid the estimate becoming too pessimal for some
620 		 * frontends that limit posted rx requests, cap the estimate
621 		 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
622 		 * the skb into the provided slots.
623 		 */
624 		if (max_slots_needed > MAX_SKB_FRAGS) {
625 			max_slots_needed = MAX_SKB_FRAGS;
626 			XENVIF_RX_CB(skb)->full_coalesce = true;
627 		} else {
628 			XENVIF_RX_CB(skb)->full_coalesce = false;
629 		}
630 
631 		/* We may need one more slot for GSO metadata */
632 		if (skb_is_gso(skb) &&
633 		   (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
634 		    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
635 			max_slots_needed++;
636 
637 		/* If the skb may not fit then bail out now */
638 		if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
639 			skb_queue_head(&queue->rx_queue, skb);
640 			need_to_notify = true;
641 			queue->rx_last_skb_slots = max_slots_needed;
642 			break;
643 		} else
644 			queue->rx_last_skb_slots = 0;
645 
646 		old_req_cons = queue->rx.req_cons;
647 		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
648 		ring_slots_used = queue->rx.req_cons - old_req_cons;
649 
650 		BUG_ON(ring_slots_used > max_slots_needed);
651 
652 		__skb_queue_tail(&rxq, skb);
653 	}
654 
655 	BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
656 
657 	if (!npo.copy_prod)
658 		goto done;
659 
660 	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
661 	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
662 
663 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
664 
665 		if ((1 << queue->meta[npo.meta_cons].gso_type) &
666 		    queue->vif->gso_prefix_mask) {
667 			resp = RING_GET_RESPONSE(&queue->rx,
668 						 queue->rx.rsp_prod_pvt++);
669 
670 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
671 
672 			resp->offset = queue->meta[npo.meta_cons].gso_size;
673 			resp->id = queue->meta[npo.meta_cons].id;
674 			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
675 
676 			npo.meta_cons++;
677 			XENVIF_RX_CB(skb)->meta_slots_used--;
678 		}
679 
680 
681 		queue->stats.tx_bytes += skb->len;
682 		queue->stats.tx_packets++;
683 
684 		status = xenvif_check_gop(queue->vif,
685 					  XENVIF_RX_CB(skb)->meta_slots_used,
686 					  &npo);
687 
688 		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
689 			flags = 0;
690 		else
691 			flags = XEN_NETRXF_more_data;
692 
693 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
694 			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
695 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
696 			/* remote but checksummed. */
697 			flags |= XEN_NETRXF_data_validated;
698 
699 		offset = 0;
700 		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
701 					status, offset,
702 					queue->meta[npo.meta_cons].size,
703 					flags);
704 
705 		if ((1 << queue->meta[npo.meta_cons].gso_type) &
706 		    queue->vif->gso_mask) {
707 			struct xen_netif_extra_info *gso =
708 				(struct xen_netif_extra_info *)
709 				RING_GET_RESPONSE(&queue->rx,
710 						  queue->rx.rsp_prod_pvt++);
711 
712 			resp->flags |= XEN_NETRXF_extra_info;
713 
714 			gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
715 			gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
716 			gso->u.gso.pad = 0;
717 			gso->u.gso.features = 0;
718 
719 			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
720 			gso->flags = 0;
721 		}
722 
723 		xenvif_add_frag_responses(queue, status,
724 					  queue->meta + npo.meta_cons + 1,
725 					  XENVIF_RX_CB(skb)->meta_slots_used);
726 
727 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
728 
729 		need_to_notify |= !!ret;
730 
731 		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
732 		dev_kfree_skb(skb);
733 	}
734 
735 done:
736 	if (need_to_notify)
737 		notify_remote_via_irq(queue->rx_irq);
738 }
739 
740 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
741 {
742 	int more_to_do;
743 
744 	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
745 
746 	if (more_to_do)
747 		napi_schedule(&queue->napi);
748 }
749 
750 static void tx_add_credit(struct xenvif_queue *queue)
751 {
752 	unsigned long max_burst, max_credit;
753 
754 	/*
755 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
756 	 * Otherwise the interface can seize up due to insufficient credit.
757 	 */
758 	max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
759 	max_burst = min(max_burst, 131072UL);
760 	max_burst = max(max_burst, queue->credit_bytes);
761 
762 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
763 	max_credit = queue->remaining_credit + queue->credit_bytes;
764 	if (max_credit < queue->remaining_credit)
765 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
766 
767 	queue->remaining_credit = min(max_credit, max_burst);
768 }
769 
770 static void tx_credit_callback(unsigned long data)
771 {
772 	struct xenvif_queue *queue = (struct xenvif_queue *)data;
773 	tx_add_credit(queue);
774 	xenvif_napi_schedule_or_enable_events(queue);
775 }
776 
777 static void xenvif_tx_err(struct xenvif_queue *queue,
778 			  struct xen_netif_tx_request *txp, RING_IDX end)
779 {
780 	RING_IDX cons = queue->tx.req_cons;
781 	unsigned long flags;
782 
783 	do {
784 		spin_lock_irqsave(&queue->response_lock, flags);
785 		make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
786 		spin_unlock_irqrestore(&queue->response_lock, flags);
787 		if (cons == end)
788 			break;
789 		txp = RING_GET_REQUEST(&queue->tx, cons++);
790 	} while (1);
791 	queue->tx.req_cons = cons;
792 }
793 
794 static void xenvif_fatal_tx_err(struct xenvif *vif)
795 {
796 	netdev_err(vif->dev, "fatal error; disabling device\n");
797 	vif->disabled = true;
798 	/* Disable the vif from queue 0's kthread */
799 	if (vif->queues)
800 		xenvif_kick_thread(&vif->queues[0]);
801 }
802 
803 static int xenvif_count_requests(struct xenvif_queue *queue,
804 				 struct xen_netif_tx_request *first,
805 				 struct xen_netif_tx_request *txp,
806 				 int work_to_do)
807 {
808 	RING_IDX cons = queue->tx.req_cons;
809 	int slots = 0;
810 	int drop_err = 0;
811 	int more_data;
812 
813 	if (!(first->flags & XEN_NETTXF_more_data))
814 		return 0;
815 
816 	do {
817 		struct xen_netif_tx_request dropped_tx = { 0 };
818 
819 		if (slots >= work_to_do) {
820 			netdev_err(queue->vif->dev,
821 				   "Asked for %d slots but exceeds this limit\n",
822 				   work_to_do);
823 			xenvif_fatal_tx_err(queue->vif);
824 			return -ENODATA;
825 		}
826 
827 		/* This guest is really using too many slots and
828 		 * considered malicious.
829 		 */
830 		if (unlikely(slots >= fatal_skb_slots)) {
831 			netdev_err(queue->vif->dev,
832 				   "Malicious frontend using %d slots, threshold %u\n",
833 				   slots, fatal_skb_slots);
834 			xenvif_fatal_tx_err(queue->vif);
835 			return -E2BIG;
836 		}
837 
838 		/* Xen network protocol had implicit dependency on
839 		 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
840 		 * the historical MAX_SKB_FRAGS value 18 to honor the
841 		 * same behavior as before. Any packet using more than
842 		 * 18 slots but less than fatal_skb_slots slots is
843 		 * dropped
844 		 */
845 		if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
846 			if (net_ratelimit())
847 				netdev_dbg(queue->vif->dev,
848 					   "Too many slots (%d) exceeding limit (%d), dropping packet\n",
849 					   slots, XEN_NETBK_LEGACY_SLOTS_MAX);
850 			drop_err = -E2BIG;
851 		}
852 
853 		if (drop_err)
854 			txp = &dropped_tx;
855 
856 		memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
857 		       sizeof(*txp));
858 
859 		/* If the guest submitted a frame >= 64 KiB then
860 		 * first->size overflowed and following slots will
861 		 * appear to be larger than the frame.
862 		 *
863 		 * This cannot be fatal error as there are buggy
864 		 * frontends that do this.
865 		 *
866 		 * Consume all slots and drop the packet.
867 		 */
868 		if (!drop_err && txp->size > first->size) {
869 			if (net_ratelimit())
870 				netdev_dbg(queue->vif->dev,
871 					   "Invalid tx request, slot size %u > remaining size %u\n",
872 					   txp->size, first->size);
873 			drop_err = -EIO;
874 		}
875 
876 		first->size -= txp->size;
877 		slots++;
878 
879 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
880 			netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
881 				 txp->offset, txp->size);
882 			xenvif_fatal_tx_err(queue->vif);
883 			return -EINVAL;
884 		}
885 
886 		more_data = txp->flags & XEN_NETTXF_more_data;
887 
888 		if (!drop_err)
889 			txp++;
890 
891 	} while (more_data);
892 
893 	if (drop_err) {
894 		xenvif_tx_err(queue, first, cons + slots);
895 		return drop_err;
896 	}
897 
898 	return slots;
899 }
900 
901 
902 struct xenvif_tx_cb {
903 	u16 pending_idx;
904 };
905 
906 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
907 
908 static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
909 					  u16 pending_idx,
910 					  struct xen_netif_tx_request *txp,
911 					  struct gnttab_map_grant_ref *mop)
912 {
913 	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
914 	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
915 			  GNTMAP_host_map | GNTMAP_readonly,
916 			  txp->gref, queue->vif->domid);
917 
918 	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
919 	       sizeof(*txp));
920 }
921 
922 static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
923 {
924 	struct sk_buff *skb =
925 		alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
926 			  GFP_ATOMIC | __GFP_NOWARN);
927 	if (unlikely(skb == NULL))
928 		return NULL;
929 
930 	/* Packets passed to netif_rx() must have some headroom. */
931 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
932 
933 	/* Initialize it here to avoid later surprises */
934 	skb_shinfo(skb)->destructor_arg = NULL;
935 
936 	return skb;
937 }
938 
939 static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
940 							struct sk_buff *skb,
941 							struct xen_netif_tx_request *txp,
942 							struct gnttab_map_grant_ref *gop)
943 {
944 	struct skb_shared_info *shinfo = skb_shinfo(skb);
945 	skb_frag_t *frags = shinfo->frags;
946 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
947 	int start;
948 	pending_ring_idx_t index;
949 	unsigned int nr_slots, frag_overflow = 0;
950 
951 	/* At this point shinfo->nr_frags is in fact the number of
952 	 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
953 	 */
954 	if (shinfo->nr_frags > MAX_SKB_FRAGS) {
955 		frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS;
956 		BUG_ON(frag_overflow > MAX_SKB_FRAGS);
957 		shinfo->nr_frags = MAX_SKB_FRAGS;
958 	}
959 	nr_slots = shinfo->nr_frags;
960 
961 	/* Skip first skb fragment if it is on same page as header fragment. */
962 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
963 
964 	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
965 	     shinfo->nr_frags++, txp++, gop++) {
966 		index = pending_index(queue->pending_cons++);
967 		pending_idx = queue->pending_ring[index];
968 		xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
969 		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
970 	}
971 
972 	if (frag_overflow) {
973 		struct sk_buff *nskb = xenvif_alloc_skb(0);
974 		if (unlikely(nskb == NULL)) {
975 			if (net_ratelimit())
976 				netdev_err(queue->vif->dev,
977 					   "Can't allocate the frag_list skb.\n");
978 			return NULL;
979 		}
980 
981 		shinfo = skb_shinfo(nskb);
982 		frags = shinfo->frags;
983 
984 		for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
985 		     shinfo->nr_frags++, txp++, gop++) {
986 			index = pending_index(queue->pending_cons++);
987 			pending_idx = queue->pending_ring[index];
988 			xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
989 			frag_set_pending_idx(&frags[shinfo->nr_frags],
990 					     pending_idx);
991 		}
992 
993 		skb_shinfo(skb)->frag_list = nskb;
994 	}
995 
996 	return gop;
997 }
998 
999 static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
1000 					   u16 pending_idx,
1001 					   grant_handle_t handle)
1002 {
1003 	if (unlikely(queue->grant_tx_handle[pending_idx] !=
1004 		     NETBACK_INVALID_HANDLE)) {
1005 		netdev_err(queue->vif->dev,
1006 			   "Trying to overwrite active handle! pending_idx: %x\n",
1007 			   pending_idx);
1008 		BUG();
1009 	}
1010 	queue->grant_tx_handle[pending_idx] = handle;
1011 }
1012 
1013 static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
1014 					     u16 pending_idx)
1015 {
1016 	if (unlikely(queue->grant_tx_handle[pending_idx] ==
1017 		     NETBACK_INVALID_HANDLE)) {
1018 		netdev_err(queue->vif->dev,
1019 			   "Trying to unmap invalid handle! pending_idx: %x\n",
1020 			   pending_idx);
1021 		BUG();
1022 	}
1023 	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
1024 }
1025 
1026 static int xenvif_tx_check_gop(struct xenvif_queue *queue,
1027 			       struct sk_buff *skb,
1028 			       struct gnttab_map_grant_ref **gopp_map,
1029 			       struct gnttab_copy **gopp_copy)
1030 {
1031 	struct gnttab_map_grant_ref *gop_map = *gopp_map;
1032 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1033 	struct skb_shared_info *shinfo = skb_shinfo(skb);
1034 	int nr_frags = shinfo->nr_frags;
1035 	int i, err;
1036 	struct sk_buff *first_skb = NULL;
1037 
1038 	/* Check status of header. */
1039 	err = (*gopp_copy)->status;
1040 	(*gopp_copy)++;
1041 	if (unlikely(err)) {
1042 		if (net_ratelimit())
1043 			netdev_dbg(queue->vif->dev,
1044 				   "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1045 				   (*gopp_copy)->status,
1046 				   pending_idx,
1047 				   (*gopp_copy)->source.u.ref);
1048 		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
1049 	}
1050 
1051 check_frags:
1052 	for (i = 0; i < nr_frags; i++, gop_map++) {
1053 		int j, newerr;
1054 
1055 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1056 
1057 		/* Check error status: if okay then remember grant handle. */
1058 		newerr = gop_map->status;
1059 
1060 		if (likely(!newerr)) {
1061 			xenvif_grant_handle_set(queue,
1062 						pending_idx,
1063 						gop_map->handle);
1064 			/* Had a previous error? Invalidate this fragment. */
1065 			if (unlikely(err))
1066 				xenvif_idx_unmap(queue, pending_idx);
1067 			continue;
1068 		}
1069 
1070 		/* Error on this fragment: respond to client with an error. */
1071 		if (net_ratelimit())
1072 			netdev_dbg(queue->vif->dev,
1073 				   "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1074 				   i,
1075 				   gop_map->status,
1076 				   pending_idx,
1077 				   gop_map->ref);
1078 		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
1079 
1080 		/* Not the first error? Preceding frags already invalidated. */
1081 		if (err)
1082 			continue;
1083 		/* First error: invalidate preceding fragments. */
1084 		for (j = 0; j < i; j++) {
1085 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1086 			xenvif_idx_unmap(queue, pending_idx);
1087 		}
1088 
1089 		/* Remember the error: invalidate all subsequent fragments. */
1090 		err = newerr;
1091 	}
1092 
1093 	if (skb_has_frag_list(skb)) {
1094 		first_skb = skb;
1095 		skb = shinfo->frag_list;
1096 		shinfo = skb_shinfo(skb);
1097 		nr_frags = shinfo->nr_frags;
1098 
1099 		goto check_frags;
1100 	}
1101 
1102 	/* There was a mapping error in the frag_list skb. We have to unmap
1103 	 * the first skb's frags
1104 	 */
1105 	if (first_skb && err) {
1106 		int j;
1107 		shinfo = skb_shinfo(first_skb);
1108 		for (j = 0; j < shinfo->nr_frags; j++) {
1109 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1110 			xenvif_idx_unmap(queue, pending_idx);
1111 		}
1112 	}
1113 
1114 	*gopp_map = gop_map;
1115 	return err;
1116 }
1117 
1118 static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
1119 {
1120 	struct skb_shared_info *shinfo = skb_shinfo(skb);
1121 	int nr_frags = shinfo->nr_frags;
1122 	int i;
1123 	u16 prev_pending_idx = INVALID_PENDING_IDX;
1124 
1125 	for (i = 0; i < nr_frags; i++) {
1126 		skb_frag_t *frag = shinfo->frags + i;
1127 		struct xen_netif_tx_request *txp;
1128 		struct page *page;
1129 		u16 pending_idx;
1130 
1131 		pending_idx = frag_get_pending_idx(frag);
1132 
1133 		/* If this is not the first frag, chain it to the previous*/
1134 		if (prev_pending_idx == INVALID_PENDING_IDX)
1135 			skb_shinfo(skb)->destructor_arg =
1136 				&callback_param(queue, pending_idx);
1137 		else
1138 			callback_param(queue, prev_pending_idx).ctx =
1139 				&callback_param(queue, pending_idx);
1140 
1141 		callback_param(queue, pending_idx).ctx = NULL;
1142 		prev_pending_idx = pending_idx;
1143 
1144 		txp = &queue->pending_tx_info[pending_idx].req;
1145 		page = virt_to_page(idx_to_kaddr(queue, pending_idx));
1146 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
1147 		skb->len += txp->size;
1148 		skb->data_len += txp->size;
1149 		skb->truesize += txp->size;
1150 
1151 		/* Take an extra reference to offset network stack's put_page */
1152 		get_page(queue->mmap_pages[pending_idx]);
1153 	}
1154 	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1155 	 * overlaps with "index", and "mapping" is not set. I think mapping
1156 	 * should be set. If delivered to local stack, it would drop this
1157 	 * skb in sk_filter unless the socket has the right to use it.
1158 	 */
1159 	skb->pfmemalloc	= false;
1160 }
1161 
1162 static int xenvif_get_extras(struct xenvif_queue *queue,
1163 				struct xen_netif_extra_info *extras,
1164 				int work_to_do)
1165 {
1166 	struct xen_netif_extra_info extra;
1167 	RING_IDX cons = queue->tx.req_cons;
1168 
1169 	do {
1170 		if (unlikely(work_to_do-- <= 0)) {
1171 			netdev_err(queue->vif->dev, "Missing extra info\n");
1172 			xenvif_fatal_tx_err(queue->vif);
1173 			return -EBADR;
1174 		}
1175 
1176 		memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
1177 		       sizeof(extra));
1178 		if (unlikely(!extra.type ||
1179 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1180 			queue->tx.req_cons = ++cons;
1181 			netdev_err(queue->vif->dev,
1182 				   "Invalid extra type: %d\n", extra.type);
1183 			xenvif_fatal_tx_err(queue->vif);
1184 			return -EINVAL;
1185 		}
1186 
1187 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1188 		queue->tx.req_cons = ++cons;
1189 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1190 
1191 	return work_to_do;
1192 }
1193 
1194 static int xenvif_set_skb_gso(struct xenvif *vif,
1195 			      struct sk_buff *skb,
1196 			      struct xen_netif_extra_info *gso)
1197 {
1198 	if (!gso->u.gso.size) {
1199 		netdev_err(vif->dev, "GSO size must not be zero.\n");
1200 		xenvif_fatal_tx_err(vif);
1201 		return -EINVAL;
1202 	}
1203 
1204 	switch (gso->u.gso.type) {
1205 	case XEN_NETIF_GSO_TYPE_TCPV4:
1206 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1207 		break;
1208 	case XEN_NETIF_GSO_TYPE_TCPV6:
1209 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1210 		break;
1211 	default:
1212 		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1213 		xenvif_fatal_tx_err(vif);
1214 		return -EINVAL;
1215 	}
1216 
1217 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1218 	/* gso_segs will be calculated later */
1219 
1220 	return 0;
1221 }
1222 
1223 static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
1224 {
1225 	bool recalculate_partial_csum = false;
1226 
1227 	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1228 	 * peers can fail to set NETRXF_csum_blank when sending a GSO
1229 	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1230 	 * recalculate the partial checksum.
1231 	 */
1232 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1233 		queue->stats.rx_gso_checksum_fixup++;
1234 		skb->ip_summed = CHECKSUM_PARTIAL;
1235 		recalculate_partial_csum = true;
1236 	}
1237 
1238 	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1239 	if (skb->ip_summed != CHECKSUM_PARTIAL)
1240 		return 0;
1241 
1242 	return skb_checksum_setup(skb, recalculate_partial_csum);
1243 }
1244 
1245 static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
1246 {
1247 	u64 now = get_jiffies_64();
1248 	u64 next_credit = queue->credit_window_start +
1249 		msecs_to_jiffies(queue->credit_usec / 1000);
1250 
1251 	/* Timer could already be pending in rare cases. */
1252 	if (timer_pending(&queue->credit_timeout))
1253 		return true;
1254 
1255 	/* Passed the point where we can replenish credit? */
1256 	if (time_after_eq64(now, next_credit)) {
1257 		queue->credit_window_start = now;
1258 		tx_add_credit(queue);
1259 	}
1260 
1261 	/* Still too big to send right now? Set a callback. */
1262 	if (size > queue->remaining_credit) {
1263 		queue->credit_timeout.data     =
1264 			(unsigned long)queue;
1265 		queue->credit_timeout.function =
1266 			tx_credit_callback;
1267 		mod_timer(&queue->credit_timeout,
1268 			  next_credit);
1269 		queue->credit_window_start = next_credit;
1270 
1271 		return true;
1272 	}
1273 
1274 	return false;
1275 }
1276 
1277 static void xenvif_tx_build_gops(struct xenvif_queue *queue,
1278 				     int budget,
1279 				     unsigned *copy_ops,
1280 				     unsigned *map_ops)
1281 {
1282 	struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop;
1283 	struct sk_buff *skb;
1284 	int ret;
1285 
1286 	while (skb_queue_len(&queue->tx_queue) < budget) {
1287 		struct xen_netif_tx_request txreq;
1288 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1289 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1290 		u16 pending_idx;
1291 		RING_IDX idx;
1292 		int work_to_do;
1293 		unsigned int data_len;
1294 		pending_ring_idx_t index;
1295 
1296 		if (queue->tx.sring->req_prod - queue->tx.req_cons >
1297 		    XEN_NETIF_TX_RING_SIZE) {
1298 			netdev_err(queue->vif->dev,
1299 				   "Impossible number of requests. "
1300 				   "req_prod %d, req_cons %d, size %ld\n",
1301 				   queue->tx.sring->req_prod, queue->tx.req_cons,
1302 				   XEN_NETIF_TX_RING_SIZE);
1303 			xenvif_fatal_tx_err(queue->vif);
1304 			break;
1305 		}
1306 
1307 		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
1308 		if (!work_to_do)
1309 			break;
1310 
1311 		idx = queue->tx.req_cons;
1312 		rmb(); /* Ensure that we see the request before we copy it. */
1313 		memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
1314 
1315 		/* Credit-based scheduling. */
1316 		if (txreq.size > queue->remaining_credit &&
1317 		    tx_credit_exceeded(queue, txreq.size))
1318 			break;
1319 
1320 		queue->remaining_credit -= txreq.size;
1321 
1322 		work_to_do--;
1323 		queue->tx.req_cons = ++idx;
1324 
1325 		memset(extras, 0, sizeof(extras));
1326 		if (txreq.flags & XEN_NETTXF_extra_info) {
1327 			work_to_do = xenvif_get_extras(queue, extras,
1328 						       work_to_do);
1329 			idx = queue->tx.req_cons;
1330 			if (unlikely(work_to_do < 0))
1331 				break;
1332 		}
1333 
1334 		ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do);
1335 		if (unlikely(ret < 0))
1336 			break;
1337 
1338 		idx += ret;
1339 
1340 		if (unlikely(txreq.size < ETH_HLEN)) {
1341 			netdev_dbg(queue->vif->dev,
1342 				   "Bad packet size: %d\n", txreq.size);
1343 			xenvif_tx_err(queue, &txreq, idx);
1344 			break;
1345 		}
1346 
1347 		/* No crossing a page as the payload mustn't fragment. */
1348 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1349 			netdev_err(queue->vif->dev,
1350 				   "txreq.offset: %x, size: %u, end: %lu\n",
1351 				   txreq.offset, txreq.size,
1352 				   (txreq.offset&~PAGE_MASK) + txreq.size);
1353 			xenvif_fatal_tx_err(queue->vif);
1354 			break;
1355 		}
1356 
1357 		index = pending_index(queue->pending_cons);
1358 		pending_idx = queue->pending_ring[index];
1359 
1360 		data_len = (txreq.size > PKT_PROT_LEN &&
1361 			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
1362 			PKT_PROT_LEN : txreq.size;
1363 
1364 		skb = xenvif_alloc_skb(data_len);
1365 		if (unlikely(skb == NULL)) {
1366 			netdev_dbg(queue->vif->dev,
1367 				   "Can't allocate a skb in start_xmit.\n");
1368 			xenvif_tx_err(queue, &txreq, idx);
1369 			break;
1370 		}
1371 
1372 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1373 			struct xen_netif_extra_info *gso;
1374 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1375 
1376 			if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
1377 				/* Failure in xenvif_set_skb_gso is fatal. */
1378 				kfree_skb(skb);
1379 				break;
1380 			}
1381 		}
1382 
1383 		XENVIF_TX_CB(skb)->pending_idx = pending_idx;
1384 
1385 		__skb_put(skb, data_len);
1386 		queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
1387 		queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
1388 		queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
1389 
1390 		queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
1391 			virt_to_mfn(skb->data);
1392 		queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
1393 		queue->tx_copy_ops[*copy_ops].dest.offset =
1394 			offset_in_page(skb->data);
1395 
1396 		queue->tx_copy_ops[*copy_ops].len = data_len;
1397 		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
1398 
1399 		(*copy_ops)++;
1400 
1401 		skb_shinfo(skb)->nr_frags = ret;
1402 		if (data_len < txreq.size) {
1403 			skb_shinfo(skb)->nr_frags++;
1404 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1405 					     pending_idx);
1406 			xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop);
1407 			gop++;
1408 		} else {
1409 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1410 					     INVALID_PENDING_IDX);
1411 			memcpy(&queue->pending_tx_info[pending_idx].req, &txreq,
1412 			       sizeof(txreq));
1413 		}
1414 
1415 		queue->pending_cons++;
1416 
1417 		request_gop = xenvif_get_requests(queue, skb, txfrags, gop);
1418 		if (request_gop == NULL) {
1419 			kfree_skb(skb);
1420 			xenvif_tx_err(queue, &txreq, idx);
1421 			break;
1422 		}
1423 		gop = request_gop;
1424 
1425 		__skb_queue_tail(&queue->tx_queue, skb);
1426 
1427 		queue->tx.req_cons = idx;
1428 
1429 		if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
1430 		    (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
1431 			break;
1432 	}
1433 
1434 	(*map_ops) = gop - queue->tx_map_ops;
1435 	return;
1436 }
1437 
1438 /* Consolidate skb with a frag_list into a brand new one with local pages on
1439  * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1440  */
1441 static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
1442 {
1443 	unsigned int offset = skb_headlen(skb);
1444 	skb_frag_t frags[MAX_SKB_FRAGS];
1445 	int i;
1446 	struct ubuf_info *uarg;
1447 	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1448 
1449 	queue->stats.tx_zerocopy_sent += 2;
1450 	queue->stats.tx_frag_overflow++;
1451 
1452 	xenvif_fill_frags(queue, nskb);
1453 	/* Subtract frags size, we will correct it later */
1454 	skb->truesize -= skb->data_len;
1455 	skb->len += nskb->len;
1456 	skb->data_len += nskb->len;
1457 
1458 	/* create a brand new frags array and coalesce there */
1459 	for (i = 0; offset < skb->len; i++) {
1460 		struct page *page;
1461 		unsigned int len;
1462 
1463 		BUG_ON(i >= MAX_SKB_FRAGS);
1464 		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
1465 		if (!page) {
1466 			int j;
1467 			skb->truesize += skb->data_len;
1468 			for (j = 0; j < i; j++)
1469 				put_page(frags[j].page.p);
1470 			return -ENOMEM;
1471 		}
1472 
1473 		if (offset + PAGE_SIZE < skb->len)
1474 			len = PAGE_SIZE;
1475 		else
1476 			len = skb->len - offset;
1477 		if (skb_copy_bits(skb, offset, page_address(page), len))
1478 			BUG();
1479 
1480 		offset += len;
1481 		frags[i].page.p = page;
1482 		frags[i].page_offset = 0;
1483 		skb_frag_size_set(&frags[i], len);
1484 	}
1485 	/* swap out with old one */
1486 	memcpy(skb_shinfo(skb)->frags,
1487 	       frags,
1488 	       i * sizeof(skb_frag_t));
1489 	skb_shinfo(skb)->nr_frags = i;
1490 	skb->truesize += i * PAGE_SIZE;
1491 
1492 	/* remove traces of mapped pages and frag_list */
1493 	skb_frag_list_init(skb);
1494 	uarg = skb_shinfo(skb)->destructor_arg;
1495 	uarg->callback(uarg, true);
1496 	skb_shinfo(skb)->destructor_arg = NULL;
1497 
1498 	skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1499 	kfree_skb(nskb);
1500 
1501 	return 0;
1502 }
1503 
1504 static int xenvif_tx_submit(struct xenvif_queue *queue)
1505 {
1506 	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
1507 	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
1508 	struct sk_buff *skb;
1509 	int work_done = 0;
1510 
1511 	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
1512 		struct xen_netif_tx_request *txp;
1513 		u16 pending_idx;
1514 		unsigned data_len;
1515 
1516 		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1517 		txp = &queue->pending_tx_info[pending_idx].req;
1518 
1519 		/* Check the remap error code. */
1520 		if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
1521 			skb_shinfo(skb)->nr_frags = 0;
1522 			kfree_skb(skb);
1523 			continue;
1524 		}
1525 
1526 		data_len = skb->len;
1527 		callback_param(queue, pending_idx).ctx = NULL;
1528 		if (data_len < txp->size) {
1529 			/* Append the packet payload as a fragment. */
1530 			txp->offset += data_len;
1531 			txp->size -= data_len;
1532 		} else {
1533 			/* Schedule a response immediately. */
1534 			xenvif_idx_release(queue, pending_idx,
1535 					   XEN_NETIF_RSP_OKAY);
1536 		}
1537 
1538 		if (txp->flags & XEN_NETTXF_csum_blank)
1539 			skb->ip_summed = CHECKSUM_PARTIAL;
1540 		else if (txp->flags & XEN_NETTXF_data_validated)
1541 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1542 
1543 		xenvif_fill_frags(queue, skb);
1544 
1545 		if (unlikely(skb_has_frag_list(skb))) {
1546 			if (xenvif_handle_frag_list(queue, skb)) {
1547 				if (net_ratelimit())
1548 					netdev_err(queue->vif->dev,
1549 						   "Not enough memory to consolidate frag_list!\n");
1550 				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1551 				kfree_skb(skb);
1552 				continue;
1553 			}
1554 		}
1555 
1556 		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1557 			int target = min_t(int, skb->len, PKT_PROT_LEN);
1558 			__pskb_pull_tail(skb, target - skb_headlen(skb));
1559 		}
1560 
1561 		skb->dev      = queue->vif->dev;
1562 		skb->protocol = eth_type_trans(skb, skb->dev);
1563 		skb_reset_network_header(skb);
1564 
1565 		if (checksum_setup(queue, skb)) {
1566 			netdev_dbg(queue->vif->dev,
1567 				   "Can't setup checksum in net_tx_action\n");
1568 			/* We have to set this flag to trigger the callback */
1569 			if (skb_shinfo(skb)->destructor_arg)
1570 				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1571 			kfree_skb(skb);
1572 			continue;
1573 		}
1574 
1575 		skb_probe_transport_header(skb, 0);
1576 
1577 		/* If the packet is GSO then we will have just set up the
1578 		 * transport header offset in checksum_setup so it's now
1579 		 * straightforward to calculate gso_segs.
1580 		 */
1581 		if (skb_is_gso(skb)) {
1582 			int mss = skb_shinfo(skb)->gso_size;
1583 			int hdrlen = skb_transport_header(skb) -
1584 				skb_mac_header(skb) +
1585 				tcp_hdrlen(skb);
1586 
1587 			skb_shinfo(skb)->gso_segs =
1588 				DIV_ROUND_UP(skb->len - hdrlen, mss);
1589 		}
1590 
1591 		queue->stats.rx_bytes += skb->len;
1592 		queue->stats.rx_packets++;
1593 
1594 		work_done++;
1595 
1596 		/* Set this flag right before netif_receive_skb, otherwise
1597 		 * someone might think this packet already left netback, and
1598 		 * do a skb_copy_ubufs while we are still in control of the
1599 		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1600 		 */
1601 		if (skb_shinfo(skb)->destructor_arg) {
1602 			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1603 			queue->stats.tx_zerocopy_sent++;
1604 		}
1605 
1606 		netif_receive_skb(skb);
1607 	}
1608 
1609 	return work_done;
1610 }
1611 
1612 void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
1613 {
1614 	unsigned long flags;
1615 	pending_ring_idx_t index;
1616 	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
1617 
1618 	/* This is the only place where we grab this lock, to protect callbacks
1619 	 * from each other.
1620 	 */
1621 	spin_lock_irqsave(&queue->callback_lock, flags);
1622 	do {
1623 		u16 pending_idx = ubuf->desc;
1624 		ubuf = (struct ubuf_info *) ubuf->ctx;
1625 		BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
1626 			MAX_PENDING_REQS);
1627 		index = pending_index(queue->dealloc_prod);
1628 		queue->dealloc_ring[index] = pending_idx;
1629 		/* Sync with xenvif_tx_dealloc_action:
1630 		 * insert idx then incr producer.
1631 		 */
1632 		smp_wmb();
1633 		queue->dealloc_prod++;
1634 	} while (ubuf);
1635 	wake_up(&queue->dealloc_wq);
1636 	spin_unlock_irqrestore(&queue->callback_lock, flags);
1637 
1638 	if (likely(zerocopy_success))
1639 		queue->stats.tx_zerocopy_success++;
1640 	else
1641 		queue->stats.tx_zerocopy_fail++;
1642 }
1643 
1644 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
1645 {
1646 	struct gnttab_unmap_grant_ref *gop;
1647 	pending_ring_idx_t dc, dp;
1648 	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1649 	unsigned int i = 0;
1650 
1651 	dc = queue->dealloc_cons;
1652 	gop = queue->tx_unmap_ops;
1653 
1654 	/* Free up any grants we have finished using */
1655 	do {
1656 		dp = queue->dealloc_prod;
1657 
1658 		/* Ensure we see all indices enqueued by all
1659 		 * xenvif_zerocopy_callback().
1660 		 */
1661 		smp_rmb();
1662 
1663 		while (dc != dp) {
1664 			BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS);
1665 			pending_idx =
1666 				queue->dealloc_ring[pending_index(dc++)];
1667 
1668 			pending_idx_release[gop-queue->tx_unmap_ops] =
1669 				pending_idx;
1670 			queue->pages_to_unmap[gop-queue->tx_unmap_ops] =
1671 				queue->mmap_pages[pending_idx];
1672 			gnttab_set_unmap_op(gop,
1673 					    idx_to_kaddr(queue, pending_idx),
1674 					    GNTMAP_host_map,
1675 					    queue->grant_tx_handle[pending_idx]);
1676 			xenvif_grant_handle_reset(queue, pending_idx);
1677 			++gop;
1678 		}
1679 
1680 	} while (dp != queue->dealloc_prod);
1681 
1682 	queue->dealloc_cons = dc;
1683 
1684 	if (gop - queue->tx_unmap_ops > 0) {
1685 		int ret;
1686 		ret = gnttab_unmap_refs(queue->tx_unmap_ops,
1687 					NULL,
1688 					queue->pages_to_unmap,
1689 					gop - queue->tx_unmap_ops);
1690 		if (ret) {
1691 			netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
1692 				   gop - queue->tx_unmap_ops, ret);
1693 			for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
1694 				if (gop[i].status != GNTST_okay)
1695 					netdev_err(queue->vif->dev,
1696 						   " host_addr: %llx handle: %x status: %d\n",
1697 						   gop[i].host_addr,
1698 						   gop[i].handle,
1699 						   gop[i].status);
1700 			}
1701 			BUG();
1702 		}
1703 	}
1704 
1705 	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
1706 		xenvif_idx_release(queue, pending_idx_release[i],
1707 				   XEN_NETIF_RSP_OKAY);
1708 }
1709 
1710 
1711 /* Called after netfront has transmitted */
1712 int xenvif_tx_action(struct xenvif_queue *queue, int budget)
1713 {
1714 	unsigned nr_mops, nr_cops = 0;
1715 	int work_done, ret;
1716 
1717 	if (unlikely(!tx_work_todo(queue)))
1718 		return 0;
1719 
1720 	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
1721 
1722 	if (nr_cops == 0)
1723 		return 0;
1724 
1725 	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
1726 	if (nr_mops != 0) {
1727 		ret = gnttab_map_refs(queue->tx_map_ops,
1728 				      NULL,
1729 				      queue->pages_to_map,
1730 				      nr_mops);
1731 		BUG_ON(ret);
1732 	}
1733 
1734 	work_done = xenvif_tx_submit(queue);
1735 
1736 	return work_done;
1737 }
1738 
1739 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
1740 			       u8 status)
1741 {
1742 	struct pending_tx_info *pending_tx_info;
1743 	pending_ring_idx_t index;
1744 	unsigned long flags;
1745 
1746 	pending_tx_info = &queue->pending_tx_info[pending_idx];
1747 	spin_lock_irqsave(&queue->response_lock, flags);
1748 	make_tx_response(queue, &pending_tx_info->req, status);
1749 	index = pending_index(queue->pending_prod);
1750 	queue->pending_ring[index] = pending_idx;
1751 	/* TX shouldn't use the index before we give it back here */
1752 	mb();
1753 	queue->pending_prod++;
1754 	spin_unlock_irqrestore(&queue->response_lock, flags);
1755 }
1756 
1757 
1758 static void make_tx_response(struct xenvif_queue *queue,
1759 			     struct xen_netif_tx_request *txp,
1760 			     s8       st)
1761 {
1762 	RING_IDX i = queue->tx.rsp_prod_pvt;
1763 	struct xen_netif_tx_response *resp;
1764 	int notify;
1765 
1766 	resp = RING_GET_RESPONSE(&queue->tx, i);
1767 	resp->id     = txp->id;
1768 	resp->status = st;
1769 
1770 	if (txp->flags & XEN_NETTXF_extra_info)
1771 		RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1772 
1773 	queue->tx.rsp_prod_pvt = ++i;
1774 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
1775 	if (notify)
1776 		notify_remote_via_irq(queue->tx_irq);
1777 }
1778 
1779 static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
1780 					     u16      id,
1781 					     s8       st,
1782 					     u16      offset,
1783 					     u16      size,
1784 					     u16      flags)
1785 {
1786 	RING_IDX i = queue->rx.rsp_prod_pvt;
1787 	struct xen_netif_rx_response *resp;
1788 
1789 	resp = RING_GET_RESPONSE(&queue->rx, i);
1790 	resp->offset     = offset;
1791 	resp->flags      = flags;
1792 	resp->id         = id;
1793 	resp->status     = (s16)size;
1794 	if (st < 0)
1795 		resp->status = (s16)st;
1796 
1797 	queue->rx.rsp_prod_pvt = ++i;
1798 
1799 	return resp;
1800 }
1801 
1802 void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1803 {
1804 	int ret;
1805 	struct gnttab_unmap_grant_ref tx_unmap_op;
1806 
1807 	gnttab_set_unmap_op(&tx_unmap_op,
1808 			    idx_to_kaddr(queue, pending_idx),
1809 			    GNTMAP_host_map,
1810 			    queue->grant_tx_handle[pending_idx]);
1811 	xenvif_grant_handle_reset(queue, pending_idx);
1812 
1813 	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
1814 				&queue->mmap_pages[pending_idx], 1);
1815 	if (ret) {
1816 		netdev_err(queue->vif->dev,
1817 			   "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1818 			   ret,
1819 			   pending_idx,
1820 			   tx_unmap_op.host_addr,
1821 			   tx_unmap_op.handle,
1822 			   tx_unmap_op.status);
1823 		BUG();
1824 	}
1825 
1826 	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY);
1827 }
1828 
1829 static inline int rx_work_todo(struct xenvif_queue *queue)
1830 {
1831 	return (!skb_queue_empty(&queue->rx_queue) &&
1832 	       xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
1833 	       queue->rx_queue_purge;
1834 }
1835 
1836 static inline int tx_work_todo(struct xenvif_queue *queue)
1837 {
1838 	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
1839 		return 1;
1840 
1841 	return 0;
1842 }
1843 
1844 static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
1845 {
1846 	return queue->dealloc_cons != queue->dealloc_prod;
1847 }
1848 
1849 void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
1850 {
1851 	if (queue->tx.sring)
1852 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
1853 					queue->tx.sring);
1854 	if (queue->rx.sring)
1855 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
1856 					queue->rx.sring);
1857 }
1858 
1859 int xenvif_map_frontend_rings(struct xenvif_queue *queue,
1860 			      grant_ref_t tx_ring_ref,
1861 			      grant_ref_t rx_ring_ref)
1862 {
1863 	void *addr;
1864 	struct xen_netif_tx_sring *txs;
1865 	struct xen_netif_rx_sring *rxs;
1866 
1867 	int err = -ENOMEM;
1868 
1869 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1870 				     tx_ring_ref, &addr);
1871 	if (err)
1872 		goto err;
1873 
1874 	txs = (struct xen_netif_tx_sring *)addr;
1875 	BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1876 
1877 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1878 				     rx_ring_ref, &addr);
1879 	if (err)
1880 		goto err;
1881 
1882 	rxs = (struct xen_netif_rx_sring *)addr;
1883 	BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1884 
1885 	return 0;
1886 
1887 err:
1888 	xenvif_unmap_frontend_rings(queue);
1889 	return err;
1890 }
1891 
1892 static void xenvif_start_queue(struct xenvif_queue *queue)
1893 {
1894 	if (xenvif_schedulable(queue->vif))
1895 		xenvif_wake_queue(queue);
1896 }
1897 
1898 int xenvif_kthread_guest_rx(void *data)
1899 {
1900 	struct xenvif_queue *queue = data;
1901 	struct sk_buff *skb;
1902 
1903 	while (!kthread_should_stop()) {
1904 		wait_event_interruptible(queue->wq,
1905 					 rx_work_todo(queue) ||
1906 					 queue->vif->disabled ||
1907 					 kthread_should_stop());
1908 
1909 		/* This frontend is found to be rogue, disable it in
1910 		 * kthread context. Currently this is only set when
1911 		 * netback finds out frontend sends malformed packet,
1912 		 * but we cannot disable the interface in softirq
1913 		 * context so we defer it here, if this thread is
1914 		 * associated with queue 0.
1915 		 */
1916 		if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
1917 			xenvif_carrier_off(queue->vif);
1918 
1919 		if (kthread_should_stop())
1920 			break;
1921 
1922 		if (queue->rx_queue_purge) {
1923 			skb_queue_purge(&queue->rx_queue);
1924 			queue->rx_queue_purge = false;
1925 		}
1926 
1927 		if (!skb_queue_empty(&queue->rx_queue))
1928 			xenvif_rx_action(queue);
1929 
1930 		if (skb_queue_empty(&queue->rx_queue) &&
1931 		    xenvif_queue_stopped(queue)) {
1932 			del_timer_sync(&queue->wake_queue);
1933 			xenvif_start_queue(queue);
1934 		}
1935 
1936 		cond_resched();
1937 	}
1938 
1939 	/* Bin any remaining skbs */
1940 	while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
1941 		dev_kfree_skb(skb);
1942 
1943 	return 0;
1944 }
1945 
1946 int xenvif_dealloc_kthread(void *data)
1947 {
1948 	struct xenvif_queue *queue = data;
1949 
1950 	while (!kthread_should_stop()) {
1951 		wait_event_interruptible(queue->dealloc_wq,
1952 					 tx_dealloc_work_todo(queue) ||
1953 					 kthread_should_stop());
1954 		if (kthread_should_stop())
1955 			break;
1956 
1957 		xenvif_tx_dealloc_action(queue);
1958 		cond_resched();
1959 	}
1960 
1961 	/* Unmap anything remaining*/
1962 	if (tx_dealloc_work_todo(queue))
1963 		xenvif_tx_dealloc_action(queue);
1964 
1965 	return 0;
1966 }
1967 
1968 static int __init netback_init(void)
1969 {
1970 	int rc = 0;
1971 
1972 	if (!xen_domain())
1973 		return -ENODEV;
1974 
1975 	/* Allow as many queues as there are CPUs, by default */
1976 	xenvif_max_queues = num_online_cpus();
1977 
1978 	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1979 		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1980 			fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1981 		fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1982 	}
1983 
1984 	rc = xenvif_xenbus_init();
1985 	if (rc)
1986 		goto failed_init;
1987 
1988 	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
1989 
1990 	return 0;
1991 
1992 failed_init:
1993 	return rc;
1994 }
1995 
1996 module_init(netback_init);
1997 
1998 static void __exit netback_fini(void)
1999 {
2000 	xenvif_xenbus_fini();
2001 }
2002 module_exit(netback_fini);
2003 
2004 MODULE_LICENSE("Dual BSD/GPL");
2005 MODULE_ALIAS("xen-backend:vif");
2006