1 /*
2  * Back-end of the driver for virtual network devices. This portion of the
3  * driver exports a 'unified' network-device interface that can be accessed
4  * by any operating system that implements a compatible front end. A
5  * reference front-end implementation can be found in:
6  *  drivers/net/xen-netfront.c
7  *
8  * Copyright (c) 2002-2005, K A Fraser
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License version 2
12  * as published by the Free Software Foundation; or, when distributed
13  * separately from the Linux kernel or incorporated into other
14  * software packages, subject to the following license:
15  *
16  * Permission is hereby granted, free of charge, to any person obtaining a copy
17  * of this source file (the "Software"), to deal in the Software without
18  * restriction, including without limitation the rights to use, copy, modify,
19  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20  * and to permit persons to whom the Software is furnished to do so, subject to
21  * the following conditions:
22  *
23  * The above copyright notice and this permission notice shall be included in
24  * all copies or substantial portions of the Software.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32  * IN THE SOFTWARE.
33  */
34 
35 #include "common.h"
36 
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 
41 #include <net/tcp.h>
42 
43 #include <xen/xen.h>
44 #include <xen/events.h>
45 #include <xen/interface/memory.h>
46 
47 #include <asm/xen/hypercall.h>
48 #include <asm/xen/page.h>
49 
50 /* Provide an option to disable split event channels at load time as
51  * event channels are limited resource. Split event channels are
52  * enabled by default.
53  */
54 bool separate_tx_rx_irq = 1;
55 module_param(separate_tx_rx_irq, bool, 0644);
56 
57 /*
58  * This is the maximum slots a skb can have. If a guest sends a skb
59  * which exceeds this limit it is considered malicious.
60  */
61 #define FATAL_SKB_SLOTS_DEFAULT 20
62 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
63 module_param(fatal_skb_slots, uint, 0444);
64 
65 /*
66  * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
67  * the maximum slots a valid packet can use. Now this value is defined
68  * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
69  * all backend.
70  */
71 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
72 
73 /*
74  * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
75  * one or more merged tx requests, otherwise it is the continuation of
76  * previous tx request.
77  */
78 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx)
79 {
80 	return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
81 }
82 
83 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
84 			       u8 status);
85 
86 static void make_tx_response(struct xenvif *vif,
87 			     struct xen_netif_tx_request *txp,
88 			     s8       st);
89 
90 static inline int tx_work_todo(struct xenvif *vif);
91 static inline int rx_work_todo(struct xenvif *vif);
92 
93 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
94 					     u16      id,
95 					     s8       st,
96 					     u16      offset,
97 					     u16      size,
98 					     u16      flags);
99 
100 static inline unsigned long idx_to_pfn(struct xenvif *vif,
101 				       u16 idx)
102 {
103 	return page_to_pfn(vif->mmap_pages[idx]);
104 }
105 
106 static inline unsigned long idx_to_kaddr(struct xenvif *vif,
107 					 u16 idx)
108 {
109 	return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
110 }
111 
112 /* This is a miniumum size for the linear area to avoid lots of
113  * calls to __pskb_pull_tail() as we set up checksum offsets. The
114  * value 128 was chosen as it covers all IPv4 and most likely
115  * IPv6 headers.
116  */
117 #define PKT_PROT_LEN 128
118 
119 static u16 frag_get_pending_idx(skb_frag_t *frag)
120 {
121 	return (u16)frag->page_offset;
122 }
123 
124 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
125 {
126 	frag->page_offset = pending_idx;
127 }
128 
129 static inline pending_ring_idx_t pending_index(unsigned i)
130 {
131 	return i & (MAX_PENDING_REQS-1);
132 }
133 
134 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
135 {
136 	return MAX_PENDING_REQS -
137 		vif->pending_prod + vif->pending_cons;
138 }
139 
140 bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
141 {
142 	RING_IDX prod, cons;
143 
144 	do {
145 		prod = vif->rx.sring->req_prod;
146 		cons = vif->rx.req_cons;
147 
148 		if (prod - cons >= needed)
149 			return true;
150 
151 		vif->rx.sring->req_event = prod + 1;
152 
153 		/* Make sure event is visible before we check prod
154 		 * again.
155 		 */
156 		mb();
157 	} while (vif->rx.sring->req_prod != prod);
158 
159 	return false;
160 }
161 
162 /*
163  * Returns true if we should start a new receive buffer instead of
164  * adding 'size' bytes to a buffer which currently contains 'offset'
165  * bytes.
166  */
167 static bool start_new_rx_buffer(int offset, unsigned long size, int head)
168 {
169 	/* simple case: we have completely filled the current buffer. */
170 	if (offset == MAX_BUFFER_OFFSET)
171 		return true;
172 
173 	/*
174 	 * complex case: start a fresh buffer if the current frag
175 	 * would overflow the current buffer but only if:
176 	 *     (i)   this frag would fit completely in the next buffer
177 	 * and (ii)  there is already some data in the current buffer
178 	 * and (iii) this is not the head buffer.
179 	 *
180 	 * Where:
181 	 * - (i) stops us splitting a frag into two copies
182 	 *   unless the frag is too large for a single buffer.
183 	 * - (ii) stops us from leaving a buffer pointlessly empty.
184 	 * - (iii) stops us leaving the first buffer
185 	 *   empty. Strictly speaking this is already covered
186 	 *   by (ii) but is explicitly checked because
187 	 *   netfront relies on the first buffer being
188 	 *   non-empty and can crash otherwise.
189 	 *
190 	 * This means we will effectively linearise small
191 	 * frags but do not needlessly split large buffers
192 	 * into multiple copies tend to give large frags their
193 	 * own buffers as before.
194 	 */
195 	if ((offset + size > MAX_BUFFER_OFFSET) &&
196 	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
197 		return true;
198 
199 	return false;
200 }
201 
202 struct netrx_pending_operations {
203 	unsigned copy_prod, copy_cons;
204 	unsigned meta_prod, meta_cons;
205 	struct gnttab_copy *copy;
206 	struct xenvif_rx_meta *meta;
207 	int copy_off;
208 	grant_ref_t copy_gref;
209 };
210 
211 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
212 						 struct netrx_pending_operations *npo)
213 {
214 	struct xenvif_rx_meta *meta;
215 	struct xen_netif_rx_request *req;
216 
217 	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
218 
219 	meta = npo->meta + npo->meta_prod++;
220 	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
221 	meta->gso_size = 0;
222 	meta->size = 0;
223 	meta->id = req->id;
224 
225 	npo->copy_off = 0;
226 	npo->copy_gref = req->gref;
227 
228 	return meta;
229 }
230 
231 /*
232  * Set up the grant operations for this fragment. If it's a flipping
233  * interface, we also set up the unmap request from here.
234  */
235 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
236 				 struct netrx_pending_operations *npo,
237 				 struct page *page, unsigned long size,
238 				 unsigned long offset, int *head)
239 {
240 	struct gnttab_copy *copy_gop;
241 	struct xenvif_rx_meta *meta;
242 	unsigned long bytes;
243 	int gso_type;
244 
245 	/* Data must not cross a page boundary. */
246 	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
247 
248 	meta = npo->meta + npo->meta_prod - 1;
249 
250 	/* Skip unused frames from start of page */
251 	page += offset >> PAGE_SHIFT;
252 	offset &= ~PAGE_MASK;
253 
254 	while (size > 0) {
255 		BUG_ON(offset >= PAGE_SIZE);
256 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
257 
258 		bytes = PAGE_SIZE - offset;
259 
260 		if (bytes > size)
261 			bytes = size;
262 
263 		if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
264 			/*
265 			 * Netfront requires there to be some data in the head
266 			 * buffer.
267 			 */
268 			BUG_ON(*head);
269 
270 			meta = get_next_rx_buffer(vif, npo);
271 		}
272 
273 		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
274 			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
275 
276 		copy_gop = npo->copy + npo->copy_prod++;
277 		copy_gop->flags = GNTCOPY_dest_gref;
278 		copy_gop->len = bytes;
279 
280 		copy_gop->source.domid = DOMID_SELF;
281 		copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
282 		copy_gop->source.offset = offset;
283 
284 		copy_gop->dest.domid = vif->domid;
285 		copy_gop->dest.offset = npo->copy_off;
286 		copy_gop->dest.u.ref = npo->copy_gref;
287 
288 		npo->copy_off += bytes;
289 		meta->size += bytes;
290 
291 		offset += bytes;
292 		size -= bytes;
293 
294 		/* Next frame */
295 		if (offset == PAGE_SIZE && size) {
296 			BUG_ON(!PageCompound(page));
297 			page++;
298 			offset = 0;
299 		}
300 
301 		/* Leave a gap for the GSO descriptor. */
302 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
303 			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
304 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
305 			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
306 		else
307 			gso_type = XEN_NETIF_GSO_TYPE_NONE;
308 
309 		if (*head && ((1 << gso_type) & vif->gso_mask))
310 			vif->rx.req_cons++;
311 
312 		*head = 0; /* There must be something in this buffer now. */
313 
314 	}
315 }
316 
317 /*
318  * Prepare an SKB to be transmitted to the frontend.
319  *
320  * This function is responsible for allocating grant operations, meta
321  * structures, etc.
322  *
323  * It returns the number of meta structures consumed. The number of
324  * ring slots used is always equal to the number of meta slots used
325  * plus the number of GSO descriptors used. Currently, we use either
326  * zero GSO descriptors (for non-GSO packets) or one descriptor (for
327  * frontend-side LRO).
328  */
329 static int xenvif_gop_skb(struct sk_buff *skb,
330 			  struct netrx_pending_operations *npo)
331 {
332 	struct xenvif *vif = netdev_priv(skb->dev);
333 	int nr_frags = skb_shinfo(skb)->nr_frags;
334 	int i;
335 	struct xen_netif_rx_request *req;
336 	struct xenvif_rx_meta *meta;
337 	unsigned char *data;
338 	int head = 1;
339 	int old_meta_prod;
340 	int gso_type;
341 	int gso_size;
342 
343 	old_meta_prod = npo->meta_prod;
344 
345 	if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
346 		gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
347 		gso_size = skb_shinfo(skb)->gso_size;
348 	} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
349 		gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
350 		gso_size = skb_shinfo(skb)->gso_size;
351 	} else {
352 		gso_type = XEN_NETIF_GSO_TYPE_NONE;
353 		gso_size = 0;
354 	}
355 
356 	/* Set up a GSO prefix descriptor, if necessary */
357 	if ((1 << gso_type) & vif->gso_prefix_mask) {
358 		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
359 		meta = npo->meta + npo->meta_prod++;
360 		meta->gso_type = gso_type;
361 		meta->gso_size = gso_size;
362 		meta->size = 0;
363 		meta->id = req->id;
364 	}
365 
366 	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
367 	meta = npo->meta + npo->meta_prod++;
368 
369 	if ((1 << gso_type) & vif->gso_mask) {
370 		meta->gso_type = gso_type;
371 		meta->gso_size = gso_size;
372 	} else {
373 		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
374 		meta->gso_size = 0;
375 	}
376 
377 	meta->size = 0;
378 	meta->id = req->id;
379 	npo->copy_off = 0;
380 	npo->copy_gref = req->gref;
381 
382 	data = skb->data;
383 	while (data < skb_tail_pointer(skb)) {
384 		unsigned int offset = offset_in_page(data);
385 		unsigned int len = PAGE_SIZE - offset;
386 
387 		if (data + len > skb_tail_pointer(skb))
388 			len = skb_tail_pointer(skb) - data;
389 
390 		xenvif_gop_frag_copy(vif, skb, npo,
391 				     virt_to_page(data), len, offset, &head);
392 		data += len;
393 	}
394 
395 	for (i = 0; i < nr_frags; i++) {
396 		xenvif_gop_frag_copy(vif, skb, npo,
397 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
398 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
399 				     skb_shinfo(skb)->frags[i].page_offset,
400 				     &head);
401 	}
402 
403 	return npo->meta_prod - old_meta_prod;
404 }
405 
406 /*
407  * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
408  * used to set up the operations on the top of
409  * netrx_pending_operations, which have since been done.  Check that
410  * they didn't give any errors and advance over them.
411  */
412 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
413 			    struct netrx_pending_operations *npo)
414 {
415 	struct gnttab_copy     *copy_op;
416 	int status = XEN_NETIF_RSP_OKAY;
417 	int i;
418 
419 	for (i = 0; i < nr_meta_slots; i++) {
420 		copy_op = npo->copy + npo->copy_cons++;
421 		if (copy_op->status != GNTST_okay) {
422 			netdev_dbg(vif->dev,
423 				   "Bad status %d from copy to DOM%d.\n",
424 				   copy_op->status, vif->domid);
425 			status = XEN_NETIF_RSP_ERROR;
426 		}
427 	}
428 
429 	return status;
430 }
431 
432 static void xenvif_add_frag_responses(struct xenvif *vif, int status,
433 				      struct xenvif_rx_meta *meta,
434 				      int nr_meta_slots)
435 {
436 	int i;
437 	unsigned long offset;
438 
439 	/* No fragments used */
440 	if (nr_meta_slots <= 1)
441 		return;
442 
443 	nr_meta_slots--;
444 
445 	for (i = 0; i < nr_meta_slots; i++) {
446 		int flags;
447 		if (i == nr_meta_slots - 1)
448 			flags = 0;
449 		else
450 			flags = XEN_NETRXF_more_data;
451 
452 		offset = 0;
453 		make_rx_response(vif, meta[i].id, status, offset,
454 				 meta[i].size, flags);
455 	}
456 }
457 
458 struct skb_cb_overlay {
459 	int meta_slots_used;
460 };
461 
462 void xenvif_kick_thread(struct xenvif *vif)
463 {
464 	wake_up(&vif->wq);
465 }
466 
467 static void xenvif_rx_action(struct xenvif *vif)
468 {
469 	s8 status;
470 	u16 flags;
471 	struct xen_netif_rx_response *resp;
472 	struct sk_buff_head rxq;
473 	struct sk_buff *skb;
474 	LIST_HEAD(notify);
475 	int ret;
476 	unsigned long offset;
477 	struct skb_cb_overlay *sco;
478 	bool need_to_notify = false;
479 	bool ring_full = false;
480 
481 	struct netrx_pending_operations npo = {
482 		.copy  = vif->grant_copy_op,
483 		.meta  = vif->meta,
484 	};
485 
486 	skb_queue_head_init(&rxq);
487 
488 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
489 		int max_slots_needed;
490 		int i;
491 
492 		/* We need a cheap worse case estimate for the number of
493 		 * slots we'll use.
494 		 */
495 
496 		max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
497 						skb_headlen(skb),
498 						PAGE_SIZE);
499 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
500 			unsigned int size;
501 			size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
502 			max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
503 		}
504 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
505 		    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
506 			max_slots_needed++;
507 
508 		/* If the skb may not fit then bail out now */
509 		if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
510 			skb_queue_head(&vif->rx_queue, skb);
511 			need_to_notify = true;
512 			ring_full = true;
513 			break;
514 		}
515 
516 		sco = (struct skb_cb_overlay *)skb->cb;
517 		sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
518 		BUG_ON(sco->meta_slots_used > max_slots_needed);
519 
520 		__skb_queue_tail(&rxq, skb);
521 	}
522 
523 	BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
524 
525 	vif->rx_queue_stopped = !npo.copy_prod && ring_full;
526 
527 	if (!npo.copy_prod)
528 		goto done;
529 
530 	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
531 	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
532 
533 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
534 		sco = (struct skb_cb_overlay *)skb->cb;
535 
536 		if ((1 << vif->meta[npo.meta_cons].gso_type) &
537 		    vif->gso_prefix_mask) {
538 			resp = RING_GET_RESPONSE(&vif->rx,
539 						 vif->rx.rsp_prod_pvt++);
540 
541 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
542 
543 			resp->offset = vif->meta[npo.meta_cons].gso_size;
544 			resp->id = vif->meta[npo.meta_cons].id;
545 			resp->status = sco->meta_slots_used;
546 
547 			npo.meta_cons++;
548 			sco->meta_slots_used--;
549 		}
550 
551 
552 		vif->dev->stats.tx_bytes += skb->len;
553 		vif->dev->stats.tx_packets++;
554 
555 		status = xenvif_check_gop(vif, sco->meta_slots_used, &npo);
556 
557 		if (sco->meta_slots_used == 1)
558 			flags = 0;
559 		else
560 			flags = XEN_NETRXF_more_data;
561 
562 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
563 			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
564 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
565 			/* remote but checksummed. */
566 			flags |= XEN_NETRXF_data_validated;
567 
568 		offset = 0;
569 		resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
570 					status, offset,
571 					vif->meta[npo.meta_cons].size,
572 					flags);
573 
574 		if ((1 << vif->meta[npo.meta_cons].gso_type) &
575 		    vif->gso_mask) {
576 			struct xen_netif_extra_info *gso =
577 				(struct xen_netif_extra_info *)
578 				RING_GET_RESPONSE(&vif->rx,
579 						  vif->rx.rsp_prod_pvt++);
580 
581 			resp->flags |= XEN_NETRXF_extra_info;
582 
583 			gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
584 			gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
585 			gso->u.gso.pad = 0;
586 			gso->u.gso.features = 0;
587 
588 			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
589 			gso->flags = 0;
590 		}
591 
592 		xenvif_add_frag_responses(vif, status,
593 					  vif->meta + npo.meta_cons + 1,
594 					  sco->meta_slots_used);
595 
596 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
597 
598 		need_to_notify |= !!ret;
599 
600 		npo.meta_cons += sco->meta_slots_used;
601 		dev_kfree_skb(skb);
602 	}
603 
604 done:
605 	if (need_to_notify)
606 		notify_remote_via_irq(vif->rx_irq);
607 }
608 
609 void xenvif_check_rx_xenvif(struct xenvif *vif)
610 {
611 	int more_to_do;
612 
613 	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
614 
615 	if (more_to_do)
616 		napi_schedule(&vif->napi);
617 }
618 
619 static void tx_add_credit(struct xenvif *vif)
620 {
621 	unsigned long max_burst, max_credit;
622 
623 	/*
624 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
625 	 * Otherwise the interface can seize up due to insufficient credit.
626 	 */
627 	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
628 	max_burst = min(max_burst, 131072UL);
629 	max_burst = max(max_burst, vif->credit_bytes);
630 
631 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
632 	max_credit = vif->remaining_credit + vif->credit_bytes;
633 	if (max_credit < vif->remaining_credit)
634 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
635 
636 	vif->remaining_credit = min(max_credit, max_burst);
637 }
638 
639 static void tx_credit_callback(unsigned long data)
640 {
641 	struct xenvif *vif = (struct xenvif *)data;
642 	tx_add_credit(vif);
643 	xenvif_check_rx_xenvif(vif);
644 }
645 
646 static void xenvif_tx_err(struct xenvif *vif,
647 			  struct xen_netif_tx_request *txp, RING_IDX end)
648 {
649 	RING_IDX cons = vif->tx.req_cons;
650 
651 	do {
652 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
653 		if (cons == end)
654 			break;
655 		txp = RING_GET_REQUEST(&vif->tx, cons++);
656 	} while (1);
657 	vif->tx.req_cons = cons;
658 }
659 
660 static void xenvif_fatal_tx_err(struct xenvif *vif)
661 {
662 	netdev_err(vif->dev, "fatal error; disabling device\n");
663 	xenvif_carrier_off(vif);
664 }
665 
666 static int xenvif_count_requests(struct xenvif *vif,
667 				 struct xen_netif_tx_request *first,
668 				 struct xen_netif_tx_request *txp,
669 				 int work_to_do)
670 {
671 	RING_IDX cons = vif->tx.req_cons;
672 	int slots = 0;
673 	int drop_err = 0;
674 	int more_data;
675 
676 	if (!(first->flags & XEN_NETTXF_more_data))
677 		return 0;
678 
679 	do {
680 		struct xen_netif_tx_request dropped_tx = { 0 };
681 
682 		if (slots >= work_to_do) {
683 			netdev_err(vif->dev,
684 				   "Asked for %d slots but exceeds this limit\n",
685 				   work_to_do);
686 			xenvif_fatal_tx_err(vif);
687 			return -ENODATA;
688 		}
689 
690 		/* This guest is really using too many slots and
691 		 * considered malicious.
692 		 */
693 		if (unlikely(slots >= fatal_skb_slots)) {
694 			netdev_err(vif->dev,
695 				   "Malicious frontend using %d slots, threshold %u\n",
696 				   slots, fatal_skb_slots);
697 			xenvif_fatal_tx_err(vif);
698 			return -E2BIG;
699 		}
700 
701 		/* Xen network protocol had implicit dependency on
702 		 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
703 		 * the historical MAX_SKB_FRAGS value 18 to honor the
704 		 * same behavior as before. Any packet using more than
705 		 * 18 slots but less than fatal_skb_slots slots is
706 		 * dropped
707 		 */
708 		if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
709 			if (net_ratelimit())
710 				netdev_dbg(vif->dev,
711 					   "Too many slots (%d) exceeding limit (%d), dropping packet\n",
712 					   slots, XEN_NETBK_LEGACY_SLOTS_MAX);
713 			drop_err = -E2BIG;
714 		}
715 
716 		if (drop_err)
717 			txp = &dropped_tx;
718 
719 		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
720 		       sizeof(*txp));
721 
722 		/* If the guest submitted a frame >= 64 KiB then
723 		 * first->size overflowed and following slots will
724 		 * appear to be larger than the frame.
725 		 *
726 		 * This cannot be fatal error as there are buggy
727 		 * frontends that do this.
728 		 *
729 		 * Consume all slots and drop the packet.
730 		 */
731 		if (!drop_err && txp->size > first->size) {
732 			if (net_ratelimit())
733 				netdev_dbg(vif->dev,
734 					   "Invalid tx request, slot size %u > remaining size %u\n",
735 					   txp->size, first->size);
736 			drop_err = -EIO;
737 		}
738 
739 		first->size -= txp->size;
740 		slots++;
741 
742 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
743 			netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
744 				 txp->offset, txp->size);
745 			xenvif_fatal_tx_err(vif);
746 			return -EINVAL;
747 		}
748 
749 		more_data = txp->flags & XEN_NETTXF_more_data;
750 
751 		if (!drop_err)
752 			txp++;
753 
754 	} while (more_data);
755 
756 	if (drop_err) {
757 		xenvif_tx_err(vif, first, cons + slots);
758 		return drop_err;
759 	}
760 
761 	return slots;
762 }
763 
764 static struct page *xenvif_alloc_page(struct xenvif *vif,
765 				      u16 pending_idx)
766 {
767 	struct page *page;
768 
769 	page = alloc_page(GFP_ATOMIC|__GFP_COLD);
770 	if (!page)
771 		return NULL;
772 	vif->mmap_pages[pending_idx] = page;
773 
774 	return page;
775 }
776 
777 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
778 					       struct sk_buff *skb,
779 					       struct xen_netif_tx_request *txp,
780 					       struct gnttab_copy *gop)
781 {
782 	struct skb_shared_info *shinfo = skb_shinfo(skb);
783 	skb_frag_t *frags = shinfo->frags;
784 	u16 pending_idx = *((u16 *)skb->data);
785 	u16 head_idx = 0;
786 	int slot, start;
787 	struct page *page;
788 	pending_ring_idx_t index, start_idx = 0;
789 	uint16_t dst_offset;
790 	unsigned int nr_slots;
791 	struct pending_tx_info *first = NULL;
792 
793 	/* At this point shinfo->nr_frags is in fact the number of
794 	 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
795 	 */
796 	nr_slots = shinfo->nr_frags;
797 
798 	/* Skip first skb fragment if it is on same page as header fragment. */
799 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
800 
801 	/* Coalesce tx requests, at this point the packet passed in
802 	 * should be <= 64K. Any packets larger than 64K have been
803 	 * handled in xenvif_count_requests().
804 	 */
805 	for (shinfo->nr_frags = slot = start; slot < nr_slots;
806 	     shinfo->nr_frags++) {
807 		struct pending_tx_info *pending_tx_info =
808 			vif->pending_tx_info;
809 
810 		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
811 		if (!page)
812 			goto err;
813 
814 		dst_offset = 0;
815 		first = NULL;
816 		while (dst_offset < PAGE_SIZE && slot < nr_slots) {
817 			gop->flags = GNTCOPY_source_gref;
818 
819 			gop->source.u.ref = txp->gref;
820 			gop->source.domid = vif->domid;
821 			gop->source.offset = txp->offset;
822 
823 			gop->dest.domid = DOMID_SELF;
824 
825 			gop->dest.offset = dst_offset;
826 			gop->dest.u.gmfn = virt_to_mfn(page_address(page));
827 
828 			if (dst_offset + txp->size > PAGE_SIZE) {
829 				/* This page can only merge a portion
830 				 * of tx request. Do not increment any
831 				 * pointer / counter here. The txp
832 				 * will be dealt with in future
833 				 * rounds, eventually hitting the
834 				 * `else` branch.
835 				 */
836 				gop->len = PAGE_SIZE - dst_offset;
837 				txp->offset += gop->len;
838 				txp->size -= gop->len;
839 				dst_offset += gop->len; /* quit loop */
840 			} else {
841 				/* This tx request can be merged in the page */
842 				gop->len = txp->size;
843 				dst_offset += gop->len;
844 
845 				index = pending_index(vif->pending_cons++);
846 
847 				pending_idx = vif->pending_ring[index];
848 
849 				memcpy(&pending_tx_info[pending_idx].req, txp,
850 				       sizeof(*txp));
851 
852 				/* Poison these fields, corresponding
853 				 * fields for head tx req will be set
854 				 * to correct values after the loop.
855 				 */
856 				vif->mmap_pages[pending_idx] = (void *)(~0UL);
857 				pending_tx_info[pending_idx].head =
858 					INVALID_PENDING_RING_IDX;
859 
860 				if (!first) {
861 					first = &pending_tx_info[pending_idx];
862 					start_idx = index;
863 					head_idx = pending_idx;
864 				}
865 
866 				txp++;
867 				slot++;
868 			}
869 
870 			gop++;
871 		}
872 
873 		first->req.offset = 0;
874 		first->req.size = dst_offset;
875 		first->head = start_idx;
876 		vif->mmap_pages[head_idx] = page;
877 		frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
878 	}
879 
880 	BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
881 
882 	return gop;
883 err:
884 	/* Unwind, freeing all pages and sending error responses. */
885 	while (shinfo->nr_frags-- > start) {
886 		xenvif_idx_release(vif,
887 				frag_get_pending_idx(&frags[shinfo->nr_frags]),
888 				XEN_NETIF_RSP_ERROR);
889 	}
890 	/* The head too, if necessary. */
891 	if (start)
892 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
893 
894 	return NULL;
895 }
896 
897 static int xenvif_tx_check_gop(struct xenvif *vif,
898 			       struct sk_buff *skb,
899 			       struct gnttab_copy **gopp)
900 {
901 	struct gnttab_copy *gop = *gopp;
902 	u16 pending_idx = *((u16 *)skb->data);
903 	struct skb_shared_info *shinfo = skb_shinfo(skb);
904 	struct pending_tx_info *tx_info;
905 	int nr_frags = shinfo->nr_frags;
906 	int i, err, start;
907 	u16 peek; /* peek into next tx request */
908 
909 	/* Check status of header. */
910 	err = gop->status;
911 	if (unlikely(err))
912 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
913 
914 	/* Skip first skb fragment if it is on same page as header fragment. */
915 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
916 
917 	for (i = start; i < nr_frags; i++) {
918 		int j, newerr;
919 		pending_ring_idx_t head;
920 
921 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
922 		tx_info = &vif->pending_tx_info[pending_idx];
923 		head = tx_info->head;
924 
925 		/* Check error status: if okay then remember grant handle. */
926 		do {
927 			newerr = (++gop)->status;
928 			if (newerr)
929 				break;
930 			peek = vif->pending_ring[pending_index(++head)];
931 		} while (!pending_tx_is_head(vif, peek));
932 
933 		if (likely(!newerr)) {
934 			/* Had a previous error? Invalidate this fragment. */
935 			if (unlikely(err))
936 				xenvif_idx_release(vif, pending_idx,
937 						   XEN_NETIF_RSP_OKAY);
938 			continue;
939 		}
940 
941 		/* Error on this fragment: respond to client with an error. */
942 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
943 
944 		/* Not the first error? Preceding frags already invalidated. */
945 		if (err)
946 			continue;
947 
948 		/* First error: invalidate header and preceding fragments. */
949 		pending_idx = *((u16 *)skb->data);
950 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
951 		for (j = start; j < i; j++) {
952 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
953 			xenvif_idx_release(vif, pending_idx,
954 					   XEN_NETIF_RSP_OKAY);
955 		}
956 
957 		/* Remember the error: invalidate all subsequent fragments. */
958 		err = newerr;
959 	}
960 
961 	*gopp = gop + 1;
962 	return err;
963 }
964 
965 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
966 {
967 	struct skb_shared_info *shinfo = skb_shinfo(skb);
968 	int nr_frags = shinfo->nr_frags;
969 	int i;
970 
971 	for (i = 0; i < nr_frags; i++) {
972 		skb_frag_t *frag = shinfo->frags + i;
973 		struct xen_netif_tx_request *txp;
974 		struct page *page;
975 		u16 pending_idx;
976 
977 		pending_idx = frag_get_pending_idx(frag);
978 
979 		txp = &vif->pending_tx_info[pending_idx].req;
980 		page = virt_to_page(idx_to_kaddr(vif, pending_idx));
981 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
982 		skb->len += txp->size;
983 		skb->data_len += txp->size;
984 		skb->truesize += txp->size;
985 
986 		/* Take an extra reference to offset xenvif_idx_release */
987 		get_page(vif->mmap_pages[pending_idx]);
988 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
989 	}
990 }
991 
992 static int xenvif_get_extras(struct xenvif *vif,
993 				struct xen_netif_extra_info *extras,
994 				int work_to_do)
995 {
996 	struct xen_netif_extra_info extra;
997 	RING_IDX cons = vif->tx.req_cons;
998 
999 	do {
1000 		if (unlikely(work_to_do-- <= 0)) {
1001 			netdev_err(vif->dev, "Missing extra info\n");
1002 			xenvif_fatal_tx_err(vif);
1003 			return -EBADR;
1004 		}
1005 
1006 		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
1007 		       sizeof(extra));
1008 		if (unlikely(!extra.type ||
1009 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1010 			vif->tx.req_cons = ++cons;
1011 			netdev_err(vif->dev,
1012 				   "Invalid extra type: %d\n", extra.type);
1013 			xenvif_fatal_tx_err(vif);
1014 			return -EINVAL;
1015 		}
1016 
1017 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1018 		vif->tx.req_cons = ++cons;
1019 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1020 
1021 	return work_to_do;
1022 }
1023 
1024 static int xenvif_set_skb_gso(struct xenvif *vif,
1025 			      struct sk_buff *skb,
1026 			      struct xen_netif_extra_info *gso)
1027 {
1028 	if (!gso->u.gso.size) {
1029 		netdev_err(vif->dev, "GSO size must not be zero.\n");
1030 		xenvif_fatal_tx_err(vif);
1031 		return -EINVAL;
1032 	}
1033 
1034 	switch (gso->u.gso.type) {
1035 	case XEN_NETIF_GSO_TYPE_TCPV4:
1036 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1037 		break;
1038 	case XEN_NETIF_GSO_TYPE_TCPV6:
1039 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1040 		break;
1041 	default:
1042 		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1043 		xenvif_fatal_tx_err(vif);
1044 		return -EINVAL;
1045 	}
1046 
1047 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1048 	/* gso_segs will be calculated later */
1049 
1050 	return 0;
1051 }
1052 
1053 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1054 {
1055 	bool recalculate_partial_csum = false;
1056 
1057 	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1058 	 * peers can fail to set NETRXF_csum_blank when sending a GSO
1059 	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1060 	 * recalculate the partial checksum.
1061 	 */
1062 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1063 		vif->rx_gso_checksum_fixup++;
1064 		skb->ip_summed = CHECKSUM_PARTIAL;
1065 		recalculate_partial_csum = true;
1066 	}
1067 
1068 	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1069 	if (skb->ip_summed != CHECKSUM_PARTIAL)
1070 		return 0;
1071 
1072 	return skb_checksum_setup(skb, recalculate_partial_csum);
1073 }
1074 
1075 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1076 {
1077 	u64 now = get_jiffies_64();
1078 	u64 next_credit = vif->credit_window_start +
1079 		msecs_to_jiffies(vif->credit_usec / 1000);
1080 
1081 	/* Timer could already be pending in rare cases. */
1082 	if (timer_pending(&vif->credit_timeout))
1083 		return true;
1084 
1085 	/* Passed the point where we can replenish credit? */
1086 	if (time_after_eq64(now, next_credit)) {
1087 		vif->credit_window_start = now;
1088 		tx_add_credit(vif);
1089 	}
1090 
1091 	/* Still too big to send right now? Set a callback. */
1092 	if (size > vif->remaining_credit) {
1093 		vif->credit_timeout.data     =
1094 			(unsigned long)vif;
1095 		vif->credit_timeout.function =
1096 			tx_credit_callback;
1097 		mod_timer(&vif->credit_timeout,
1098 			  next_credit);
1099 		vif->credit_window_start = next_credit;
1100 
1101 		return true;
1102 	}
1103 
1104 	return false;
1105 }
1106 
1107 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
1108 {
1109 	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
1110 	struct sk_buff *skb;
1111 	int ret;
1112 
1113 	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
1114 		< MAX_PENDING_REQS) &&
1115 	       (skb_queue_len(&vif->tx_queue) < budget)) {
1116 		struct xen_netif_tx_request txreq;
1117 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1118 		struct page *page;
1119 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1120 		u16 pending_idx;
1121 		RING_IDX idx;
1122 		int work_to_do;
1123 		unsigned int data_len;
1124 		pending_ring_idx_t index;
1125 
1126 		if (vif->tx.sring->req_prod - vif->tx.req_cons >
1127 		    XEN_NETIF_TX_RING_SIZE) {
1128 			netdev_err(vif->dev,
1129 				   "Impossible number of requests. "
1130 				   "req_prod %d, req_cons %d, size %ld\n",
1131 				   vif->tx.sring->req_prod, vif->tx.req_cons,
1132 				   XEN_NETIF_TX_RING_SIZE);
1133 			xenvif_fatal_tx_err(vif);
1134 			continue;
1135 		}
1136 
1137 		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
1138 		if (!work_to_do)
1139 			break;
1140 
1141 		idx = vif->tx.req_cons;
1142 		rmb(); /* Ensure that we see the request before we copy it. */
1143 		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
1144 
1145 		/* Credit-based scheduling. */
1146 		if (txreq.size > vif->remaining_credit &&
1147 		    tx_credit_exceeded(vif, txreq.size))
1148 			break;
1149 
1150 		vif->remaining_credit -= txreq.size;
1151 
1152 		work_to_do--;
1153 		vif->tx.req_cons = ++idx;
1154 
1155 		memset(extras, 0, sizeof(extras));
1156 		if (txreq.flags & XEN_NETTXF_extra_info) {
1157 			work_to_do = xenvif_get_extras(vif, extras,
1158 						       work_to_do);
1159 			idx = vif->tx.req_cons;
1160 			if (unlikely(work_to_do < 0))
1161 				break;
1162 		}
1163 
1164 		ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
1165 		if (unlikely(ret < 0))
1166 			break;
1167 
1168 		idx += ret;
1169 
1170 		if (unlikely(txreq.size < ETH_HLEN)) {
1171 			netdev_dbg(vif->dev,
1172 				   "Bad packet size: %d\n", txreq.size);
1173 			xenvif_tx_err(vif, &txreq, idx);
1174 			break;
1175 		}
1176 
1177 		/* No crossing a page as the payload mustn't fragment. */
1178 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1179 			netdev_err(vif->dev,
1180 				   "txreq.offset: %x, size: %u, end: %lu\n",
1181 				   txreq.offset, txreq.size,
1182 				   (txreq.offset&~PAGE_MASK) + txreq.size);
1183 			xenvif_fatal_tx_err(vif);
1184 			break;
1185 		}
1186 
1187 		index = pending_index(vif->pending_cons);
1188 		pending_idx = vif->pending_ring[index];
1189 
1190 		data_len = (txreq.size > PKT_PROT_LEN &&
1191 			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
1192 			PKT_PROT_LEN : txreq.size;
1193 
1194 		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
1195 				GFP_ATOMIC | __GFP_NOWARN);
1196 		if (unlikely(skb == NULL)) {
1197 			netdev_dbg(vif->dev,
1198 				   "Can't allocate a skb in start_xmit.\n");
1199 			xenvif_tx_err(vif, &txreq, idx);
1200 			break;
1201 		}
1202 
1203 		/* Packets passed to netif_rx() must have some headroom. */
1204 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1205 
1206 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1207 			struct xen_netif_extra_info *gso;
1208 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1209 
1210 			if (xenvif_set_skb_gso(vif, skb, gso)) {
1211 				/* Failure in xenvif_set_skb_gso is fatal. */
1212 				kfree_skb(skb);
1213 				break;
1214 			}
1215 		}
1216 
1217 		/* XXX could copy straight to head */
1218 		page = xenvif_alloc_page(vif, pending_idx);
1219 		if (!page) {
1220 			kfree_skb(skb);
1221 			xenvif_tx_err(vif, &txreq, idx);
1222 			break;
1223 		}
1224 
1225 		gop->source.u.ref = txreq.gref;
1226 		gop->source.domid = vif->domid;
1227 		gop->source.offset = txreq.offset;
1228 
1229 		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1230 		gop->dest.domid = DOMID_SELF;
1231 		gop->dest.offset = txreq.offset;
1232 
1233 		gop->len = txreq.size;
1234 		gop->flags = GNTCOPY_source_gref;
1235 
1236 		gop++;
1237 
1238 		memcpy(&vif->pending_tx_info[pending_idx].req,
1239 		       &txreq, sizeof(txreq));
1240 		vif->pending_tx_info[pending_idx].head = index;
1241 		*((u16 *)skb->data) = pending_idx;
1242 
1243 		__skb_put(skb, data_len);
1244 
1245 		skb_shinfo(skb)->nr_frags = ret;
1246 		if (data_len < txreq.size) {
1247 			skb_shinfo(skb)->nr_frags++;
1248 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1249 					     pending_idx);
1250 		} else {
1251 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1252 					     INVALID_PENDING_IDX);
1253 		}
1254 
1255 		vif->pending_cons++;
1256 
1257 		request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
1258 		if (request_gop == NULL) {
1259 			kfree_skb(skb);
1260 			xenvif_tx_err(vif, &txreq, idx);
1261 			break;
1262 		}
1263 		gop = request_gop;
1264 
1265 		__skb_queue_tail(&vif->tx_queue, skb);
1266 
1267 		vif->tx.req_cons = idx;
1268 
1269 		if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
1270 			break;
1271 	}
1272 
1273 	return gop - vif->tx_copy_ops;
1274 }
1275 
1276 
1277 static int xenvif_tx_submit(struct xenvif *vif)
1278 {
1279 	struct gnttab_copy *gop = vif->tx_copy_ops;
1280 	struct sk_buff *skb;
1281 	int work_done = 0;
1282 
1283 	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
1284 		struct xen_netif_tx_request *txp;
1285 		u16 pending_idx;
1286 		unsigned data_len;
1287 
1288 		pending_idx = *((u16 *)skb->data);
1289 		txp = &vif->pending_tx_info[pending_idx].req;
1290 
1291 		/* Check the remap error code. */
1292 		if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) {
1293 			netdev_dbg(vif->dev, "netback grant failed.\n");
1294 			skb_shinfo(skb)->nr_frags = 0;
1295 			kfree_skb(skb);
1296 			continue;
1297 		}
1298 
1299 		data_len = skb->len;
1300 		memcpy(skb->data,
1301 		       (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
1302 		       data_len);
1303 		if (data_len < txp->size) {
1304 			/* Append the packet payload as a fragment. */
1305 			txp->offset += data_len;
1306 			txp->size -= data_len;
1307 		} else {
1308 			/* Schedule a response immediately. */
1309 			xenvif_idx_release(vif, pending_idx,
1310 					   XEN_NETIF_RSP_OKAY);
1311 		}
1312 
1313 		if (txp->flags & XEN_NETTXF_csum_blank)
1314 			skb->ip_summed = CHECKSUM_PARTIAL;
1315 		else if (txp->flags & XEN_NETTXF_data_validated)
1316 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1317 
1318 		xenvif_fill_frags(vif, skb);
1319 
1320 		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1321 			int target = min_t(int, skb->len, PKT_PROT_LEN);
1322 			__pskb_pull_tail(skb, target - skb_headlen(skb));
1323 		}
1324 
1325 		skb->dev      = vif->dev;
1326 		skb->protocol = eth_type_trans(skb, skb->dev);
1327 		skb_reset_network_header(skb);
1328 
1329 		if (checksum_setup(vif, skb)) {
1330 			netdev_dbg(vif->dev,
1331 				   "Can't setup checksum in net_tx_action\n");
1332 			kfree_skb(skb);
1333 			continue;
1334 		}
1335 
1336 		skb_probe_transport_header(skb, 0);
1337 
1338 		/* If the packet is GSO then we will have just set up the
1339 		 * transport header offset in checksum_setup so it's now
1340 		 * straightforward to calculate gso_segs.
1341 		 */
1342 		if (skb_is_gso(skb)) {
1343 			int mss = skb_shinfo(skb)->gso_size;
1344 			int hdrlen = skb_transport_header(skb) -
1345 				skb_mac_header(skb) +
1346 				tcp_hdrlen(skb);
1347 
1348 			skb_shinfo(skb)->gso_segs =
1349 				DIV_ROUND_UP(skb->len - hdrlen, mss);
1350 		}
1351 
1352 		vif->dev->stats.rx_bytes += skb->len;
1353 		vif->dev->stats.rx_packets++;
1354 
1355 		work_done++;
1356 
1357 		netif_receive_skb(skb);
1358 	}
1359 
1360 	return work_done;
1361 }
1362 
1363 /* Called after netfront has transmitted */
1364 int xenvif_tx_action(struct xenvif *vif, int budget)
1365 {
1366 	unsigned nr_gops;
1367 	int work_done;
1368 
1369 	if (unlikely(!tx_work_todo(vif)))
1370 		return 0;
1371 
1372 	nr_gops = xenvif_tx_build_gops(vif, budget);
1373 
1374 	if (nr_gops == 0)
1375 		return 0;
1376 
1377 	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
1378 
1379 	work_done = xenvif_tx_submit(vif);
1380 
1381 	return work_done;
1382 }
1383 
1384 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
1385 			       u8 status)
1386 {
1387 	struct pending_tx_info *pending_tx_info;
1388 	pending_ring_idx_t head;
1389 	u16 peek; /* peek into next tx request */
1390 
1391 	BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
1392 
1393 	/* Already complete? */
1394 	if (vif->mmap_pages[pending_idx] == NULL)
1395 		return;
1396 
1397 	pending_tx_info = &vif->pending_tx_info[pending_idx];
1398 
1399 	head = pending_tx_info->head;
1400 
1401 	BUG_ON(!pending_tx_is_head(vif, head));
1402 	BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
1403 
1404 	do {
1405 		pending_ring_idx_t index;
1406 		pending_ring_idx_t idx = pending_index(head);
1407 		u16 info_idx = vif->pending_ring[idx];
1408 
1409 		pending_tx_info = &vif->pending_tx_info[info_idx];
1410 		make_tx_response(vif, &pending_tx_info->req, status);
1411 
1412 		/* Setting any number other than
1413 		 * INVALID_PENDING_RING_IDX indicates this slot is
1414 		 * starting a new packet / ending a previous packet.
1415 		 */
1416 		pending_tx_info->head = 0;
1417 
1418 		index = pending_index(vif->pending_prod++);
1419 		vif->pending_ring[index] = vif->pending_ring[info_idx];
1420 
1421 		peek = vif->pending_ring[pending_index(++head)];
1422 
1423 	} while (!pending_tx_is_head(vif, peek));
1424 
1425 	put_page(vif->mmap_pages[pending_idx]);
1426 	vif->mmap_pages[pending_idx] = NULL;
1427 }
1428 
1429 
1430 static void make_tx_response(struct xenvif *vif,
1431 			     struct xen_netif_tx_request *txp,
1432 			     s8       st)
1433 {
1434 	RING_IDX i = vif->tx.rsp_prod_pvt;
1435 	struct xen_netif_tx_response *resp;
1436 	int notify;
1437 
1438 	resp = RING_GET_RESPONSE(&vif->tx, i);
1439 	resp->id     = txp->id;
1440 	resp->status = st;
1441 
1442 	if (txp->flags & XEN_NETTXF_extra_info)
1443 		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1444 
1445 	vif->tx.rsp_prod_pvt = ++i;
1446 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
1447 	if (notify)
1448 		notify_remote_via_irq(vif->tx_irq);
1449 }
1450 
1451 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1452 					     u16      id,
1453 					     s8       st,
1454 					     u16      offset,
1455 					     u16      size,
1456 					     u16      flags)
1457 {
1458 	RING_IDX i = vif->rx.rsp_prod_pvt;
1459 	struct xen_netif_rx_response *resp;
1460 
1461 	resp = RING_GET_RESPONSE(&vif->rx, i);
1462 	resp->offset     = offset;
1463 	resp->flags      = flags;
1464 	resp->id         = id;
1465 	resp->status     = (s16)size;
1466 	if (st < 0)
1467 		resp->status = (s16)st;
1468 
1469 	vif->rx.rsp_prod_pvt = ++i;
1470 
1471 	return resp;
1472 }
1473 
1474 static inline int rx_work_todo(struct xenvif *vif)
1475 {
1476 	return (!skb_queue_empty(&vif->rx_queue) && !vif->rx_queue_stopped) ||
1477 		vif->rx_event;
1478 }
1479 
1480 static inline int tx_work_todo(struct xenvif *vif)
1481 {
1482 
1483 	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
1484 	    (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
1485 	     < MAX_PENDING_REQS))
1486 		return 1;
1487 
1488 	return 0;
1489 }
1490 
1491 void xenvif_unmap_frontend_rings(struct xenvif *vif)
1492 {
1493 	if (vif->tx.sring)
1494 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1495 					vif->tx.sring);
1496 	if (vif->rx.sring)
1497 		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1498 					vif->rx.sring);
1499 }
1500 
1501 int xenvif_map_frontend_rings(struct xenvif *vif,
1502 			      grant_ref_t tx_ring_ref,
1503 			      grant_ref_t rx_ring_ref)
1504 {
1505 	void *addr;
1506 	struct xen_netif_tx_sring *txs;
1507 	struct xen_netif_rx_sring *rxs;
1508 
1509 	int err = -ENOMEM;
1510 
1511 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1512 				     tx_ring_ref, &addr);
1513 	if (err)
1514 		goto err;
1515 
1516 	txs = (struct xen_netif_tx_sring *)addr;
1517 	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
1518 
1519 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1520 				     rx_ring_ref, &addr);
1521 	if (err)
1522 		goto err;
1523 
1524 	rxs = (struct xen_netif_rx_sring *)addr;
1525 	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
1526 
1527 	return 0;
1528 
1529 err:
1530 	xenvif_unmap_frontend_rings(vif);
1531 	return err;
1532 }
1533 
1534 void xenvif_stop_queue(struct xenvif *vif)
1535 {
1536 	if (!vif->can_queue)
1537 		return;
1538 
1539 	netif_stop_queue(vif->dev);
1540 }
1541 
1542 static void xenvif_start_queue(struct xenvif *vif)
1543 {
1544 	if (xenvif_schedulable(vif))
1545 		netif_wake_queue(vif->dev);
1546 }
1547 
1548 int xenvif_kthread(void *data)
1549 {
1550 	struct xenvif *vif = data;
1551 	struct sk_buff *skb;
1552 
1553 	while (!kthread_should_stop()) {
1554 		wait_event_interruptible(vif->wq,
1555 					 rx_work_todo(vif) ||
1556 					 kthread_should_stop());
1557 		if (kthread_should_stop())
1558 			break;
1559 
1560 		if (!skb_queue_empty(&vif->rx_queue))
1561 			xenvif_rx_action(vif);
1562 
1563 		vif->rx_event = false;
1564 
1565 		if (skb_queue_empty(&vif->rx_queue) &&
1566 		    netif_queue_stopped(vif->dev))
1567 			xenvif_start_queue(vif);
1568 
1569 		cond_resched();
1570 	}
1571 
1572 	/* Bin any remaining skbs */
1573 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
1574 		dev_kfree_skb(skb);
1575 
1576 	return 0;
1577 }
1578 
1579 static int __init netback_init(void)
1580 {
1581 	int rc = 0;
1582 
1583 	if (!xen_domain())
1584 		return -ENODEV;
1585 
1586 	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1587 		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1588 			fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1589 		fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1590 	}
1591 
1592 	rc = xenvif_xenbus_init();
1593 	if (rc)
1594 		goto failed_init;
1595 
1596 	return 0;
1597 
1598 failed_init:
1599 	return rc;
1600 }
1601 
1602 module_init(netback_init);
1603 
1604 static void __exit netback_fini(void)
1605 {
1606 	xenvif_xenbus_fini();
1607 }
1608 module_exit(netback_fini);
1609 
1610 MODULE_LICENSE("Dual BSD/GPL");
1611 MODULE_ALIAS("xen-backend:vif");
1612