1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 43 #include <xen/xen.h> 44 #include <xen/events.h> 45 #include <xen/interface/memory.h> 46 47 #include <asm/xen/hypercall.h> 48 #include <asm/xen/page.h> 49 50 /* 51 * This is the maximum slots a skb can have. If a guest sends a skb 52 * which exceeds this limit it is considered malicious. 53 */ 54 #define FATAL_SKB_SLOTS_DEFAULT 20 55 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 56 module_param(fatal_skb_slots, uint, 0444); 57 58 /* 59 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 60 * the maximum slots a valid packet can use. Now this value is defined 61 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 62 * all backend. 63 */ 64 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 65 66 typedef unsigned int pending_ring_idx_t; 67 #define INVALID_PENDING_RING_IDX (~0U) 68 69 struct pending_tx_info { 70 struct xen_netif_tx_request req; /* coalesced tx request */ 71 struct xenvif *vif; 72 pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX 73 * if it is head of one or more tx 74 * reqs 75 */ 76 }; 77 78 struct netbk_rx_meta { 79 int id; 80 int size; 81 int gso_size; 82 }; 83 84 #define MAX_PENDING_REQS 256 85 86 /* Discriminate from any valid pending_idx value. */ 87 #define INVALID_PENDING_IDX 0xFFFF 88 89 #define MAX_BUFFER_OFFSET PAGE_SIZE 90 91 /* extra field used in struct page */ 92 union page_ext { 93 struct { 94 #if BITS_PER_LONG < 64 95 #define IDX_WIDTH 8 96 #define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH) 97 unsigned int group:GROUP_WIDTH; 98 unsigned int idx:IDX_WIDTH; 99 #else 100 unsigned int group, idx; 101 #endif 102 } e; 103 void *mapping; 104 }; 105 106 struct xen_netbk { 107 wait_queue_head_t wq; 108 struct task_struct *task; 109 110 struct sk_buff_head rx_queue; 111 struct sk_buff_head tx_queue; 112 113 struct timer_list net_timer; 114 115 struct page *mmap_pages[MAX_PENDING_REQS]; 116 117 pending_ring_idx_t pending_prod; 118 pending_ring_idx_t pending_cons; 119 struct list_head net_schedule_list; 120 121 /* Protect the net_schedule_list in netif. */ 122 spinlock_t net_schedule_list_lock; 123 124 atomic_t netfront_count; 125 126 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; 127 /* Coalescing tx requests before copying makes number of grant 128 * copy ops greater or equal to number of slots required. In 129 * worst case a tx request consumes 2 gnttab_copy. 130 */ 131 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; 132 133 u16 pending_ring[MAX_PENDING_REQS]; 134 135 /* 136 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each 137 * head/fragment page uses 2 copy operations because it 138 * straddles two buffers in the frontend. 139 */ 140 struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; 141 struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; 142 }; 143 144 static struct xen_netbk *xen_netbk; 145 static int xen_netbk_group_nr; 146 147 /* 148 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 149 * one or more merged tx requests, otherwise it is the continuation of 150 * previous tx request. 151 */ 152 static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) 153 { 154 return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 155 } 156 157 void xen_netbk_add_xenvif(struct xenvif *vif) 158 { 159 int i; 160 int min_netfront_count; 161 int min_group = 0; 162 struct xen_netbk *netbk; 163 164 min_netfront_count = atomic_read(&xen_netbk[0].netfront_count); 165 for (i = 0; i < xen_netbk_group_nr; i++) { 166 int netfront_count = atomic_read(&xen_netbk[i].netfront_count); 167 if (netfront_count < min_netfront_count) { 168 min_group = i; 169 min_netfront_count = netfront_count; 170 } 171 } 172 173 netbk = &xen_netbk[min_group]; 174 175 vif->netbk = netbk; 176 atomic_inc(&netbk->netfront_count); 177 } 178 179 void xen_netbk_remove_xenvif(struct xenvif *vif) 180 { 181 struct xen_netbk *netbk = vif->netbk; 182 vif->netbk = NULL; 183 atomic_dec(&netbk->netfront_count); 184 } 185 186 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, 187 u8 status); 188 static void make_tx_response(struct xenvif *vif, 189 struct xen_netif_tx_request *txp, 190 s8 st); 191 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 192 u16 id, 193 s8 st, 194 u16 offset, 195 u16 size, 196 u16 flags); 197 198 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, 199 u16 idx) 200 { 201 return page_to_pfn(netbk->mmap_pages[idx]); 202 } 203 204 static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, 205 u16 idx) 206 { 207 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); 208 } 209 210 /* extra field used in struct page */ 211 static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk, 212 unsigned int idx) 213 { 214 unsigned int group = netbk - xen_netbk; 215 union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; 216 217 BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); 218 pg->mapping = ext.mapping; 219 } 220 221 static int get_page_ext(struct page *pg, 222 unsigned int *pgroup, unsigned int *pidx) 223 { 224 union page_ext ext = { .mapping = pg->mapping }; 225 struct xen_netbk *netbk; 226 unsigned int group, idx; 227 228 group = ext.e.group - 1; 229 230 if (group < 0 || group >= xen_netbk_group_nr) 231 return 0; 232 233 netbk = &xen_netbk[group]; 234 235 idx = ext.e.idx; 236 237 if ((idx < 0) || (idx >= MAX_PENDING_REQS)) 238 return 0; 239 240 if (netbk->mmap_pages[idx] != pg) 241 return 0; 242 243 *pgroup = group; 244 *pidx = idx; 245 246 return 1; 247 } 248 249 /* 250 * This is the amount of packet we copy rather than map, so that the 251 * guest can't fiddle with the contents of the headers while we do 252 * packet processing on them (netfilter, routing, etc). 253 */ 254 #define PKT_PROT_LEN (ETH_HLEN + \ 255 VLAN_HLEN + \ 256 sizeof(struct iphdr) + MAX_IPOPTLEN + \ 257 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) 258 259 static u16 frag_get_pending_idx(skb_frag_t *frag) 260 { 261 return (u16)frag->page_offset; 262 } 263 264 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 265 { 266 frag->page_offset = pending_idx; 267 } 268 269 static inline pending_ring_idx_t pending_index(unsigned i) 270 { 271 return i & (MAX_PENDING_REQS-1); 272 } 273 274 static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) 275 { 276 return MAX_PENDING_REQS - 277 netbk->pending_prod + netbk->pending_cons; 278 } 279 280 static void xen_netbk_kick_thread(struct xen_netbk *netbk) 281 { 282 wake_up(&netbk->wq); 283 } 284 285 static int max_required_rx_slots(struct xenvif *vif) 286 { 287 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 288 289 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 290 if (vif->can_sg || vif->gso || vif->gso_prefix) 291 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 292 293 return max; 294 } 295 296 int xen_netbk_rx_ring_full(struct xenvif *vif) 297 { 298 RING_IDX peek = vif->rx_req_cons_peek; 299 RING_IDX needed = max_required_rx_slots(vif); 300 301 return ((vif->rx.sring->req_prod - peek) < needed) || 302 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 303 } 304 305 int xen_netbk_must_stop_queue(struct xenvif *vif) 306 { 307 if (!xen_netbk_rx_ring_full(vif)) 308 return 0; 309 310 vif->rx.sring->req_event = vif->rx_req_cons_peek + 311 max_required_rx_slots(vif); 312 mb(); /* request notification /then/ check the queue */ 313 314 return xen_netbk_rx_ring_full(vif); 315 } 316 317 /* 318 * Returns true if we should start a new receive buffer instead of 319 * adding 'size' bytes to a buffer which currently contains 'offset' 320 * bytes. 321 */ 322 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 323 { 324 /* simple case: we have completely filled the current buffer. */ 325 if (offset == MAX_BUFFER_OFFSET) 326 return true; 327 328 /* 329 * complex case: start a fresh buffer if the current frag 330 * would overflow the current buffer but only if: 331 * (i) this frag would fit completely in the next buffer 332 * and (ii) there is already some data in the current buffer 333 * and (iii) this is not the head buffer. 334 * 335 * Where: 336 * - (i) stops us splitting a frag into two copies 337 * unless the frag is too large for a single buffer. 338 * - (ii) stops us from leaving a buffer pointlessly empty. 339 * - (iii) stops us leaving the first buffer 340 * empty. Strictly speaking this is already covered 341 * by (ii) but is explicitly checked because 342 * netfront relies on the first buffer being 343 * non-empty and can crash otherwise. 344 * 345 * This means we will effectively linearise small 346 * frags but do not needlessly split large buffers 347 * into multiple copies tend to give large frags their 348 * own buffers as before. 349 */ 350 if ((offset + size > MAX_BUFFER_OFFSET) && 351 (size <= MAX_BUFFER_OFFSET) && offset && !head) 352 return true; 353 354 return false; 355 } 356 357 /* 358 * Figure out how many ring slots we're going to need to send @skb to 359 * the guest. This function is essentially a dry run of 360 * netbk_gop_frag_copy. 361 */ 362 unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 363 { 364 unsigned int count; 365 int i, copy_off; 366 367 count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE); 368 369 copy_off = skb_headlen(skb) % PAGE_SIZE; 370 371 if (skb_shinfo(skb)->gso_size) 372 count++; 373 374 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 375 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 376 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 377 unsigned long bytes; 378 379 offset &= ~PAGE_MASK; 380 381 while (size > 0) { 382 BUG_ON(offset >= PAGE_SIZE); 383 BUG_ON(copy_off > MAX_BUFFER_OFFSET); 384 385 bytes = PAGE_SIZE - offset; 386 387 if (bytes > size) 388 bytes = size; 389 390 if (start_new_rx_buffer(copy_off, bytes, 0)) { 391 count++; 392 copy_off = 0; 393 } 394 395 if (copy_off + bytes > MAX_BUFFER_OFFSET) 396 bytes = MAX_BUFFER_OFFSET - copy_off; 397 398 copy_off += bytes; 399 400 offset += bytes; 401 size -= bytes; 402 403 if (offset == PAGE_SIZE) 404 offset = 0; 405 } 406 } 407 return count; 408 } 409 410 struct netrx_pending_operations { 411 unsigned copy_prod, copy_cons; 412 unsigned meta_prod, meta_cons; 413 struct gnttab_copy *copy; 414 struct netbk_rx_meta *meta; 415 int copy_off; 416 grant_ref_t copy_gref; 417 }; 418 419 static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif, 420 struct netrx_pending_operations *npo) 421 { 422 struct netbk_rx_meta *meta; 423 struct xen_netif_rx_request *req; 424 425 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 426 427 meta = npo->meta + npo->meta_prod++; 428 meta->gso_size = 0; 429 meta->size = 0; 430 meta->id = req->id; 431 432 npo->copy_off = 0; 433 npo->copy_gref = req->gref; 434 435 return meta; 436 } 437 438 /* 439 * Set up the grant operations for this fragment. If it's a flipping 440 * interface, we also set up the unmap request from here. 441 */ 442 static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 443 struct netrx_pending_operations *npo, 444 struct page *page, unsigned long size, 445 unsigned long offset, int *head) 446 { 447 struct gnttab_copy *copy_gop; 448 struct netbk_rx_meta *meta; 449 /* 450 * These variables are used iff get_page_ext returns true, 451 * in which case they are guaranteed to be initialized. 452 */ 453 unsigned int uninitialized_var(group), uninitialized_var(idx); 454 int foreign = get_page_ext(page, &group, &idx); 455 unsigned long bytes; 456 457 /* Data must not cross a page boundary. */ 458 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 459 460 meta = npo->meta + npo->meta_prod - 1; 461 462 /* Skip unused frames from start of page */ 463 page += offset >> PAGE_SHIFT; 464 offset &= ~PAGE_MASK; 465 466 while (size > 0) { 467 BUG_ON(offset >= PAGE_SIZE); 468 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 469 470 bytes = PAGE_SIZE - offset; 471 472 if (bytes > size) 473 bytes = size; 474 475 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 476 /* 477 * Netfront requires there to be some data in the head 478 * buffer. 479 */ 480 BUG_ON(*head); 481 482 meta = get_next_rx_buffer(vif, npo); 483 } 484 485 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 486 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 487 488 copy_gop = npo->copy + npo->copy_prod++; 489 copy_gop->flags = GNTCOPY_dest_gref; 490 if (foreign) { 491 struct xen_netbk *netbk = &xen_netbk[group]; 492 struct pending_tx_info *src_pend; 493 494 src_pend = &netbk->pending_tx_info[idx]; 495 496 copy_gop->source.domid = src_pend->vif->domid; 497 copy_gop->source.u.ref = src_pend->req.gref; 498 copy_gop->flags |= GNTCOPY_source_gref; 499 } else { 500 void *vaddr = page_address(page); 501 copy_gop->source.domid = DOMID_SELF; 502 copy_gop->source.u.gmfn = virt_to_mfn(vaddr); 503 } 504 copy_gop->source.offset = offset; 505 copy_gop->dest.domid = vif->domid; 506 507 copy_gop->dest.offset = npo->copy_off; 508 copy_gop->dest.u.ref = npo->copy_gref; 509 copy_gop->len = bytes; 510 511 npo->copy_off += bytes; 512 meta->size += bytes; 513 514 offset += bytes; 515 size -= bytes; 516 517 /* Next frame */ 518 if (offset == PAGE_SIZE && size) { 519 BUG_ON(!PageCompound(page)); 520 page++; 521 offset = 0; 522 } 523 524 /* Leave a gap for the GSO descriptor. */ 525 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 526 vif->rx.req_cons++; 527 528 *head = 0; /* There must be something in this buffer now. */ 529 530 } 531 } 532 533 /* 534 * Prepare an SKB to be transmitted to the frontend. 535 * 536 * This function is responsible for allocating grant operations, meta 537 * structures, etc. 538 * 539 * It returns the number of meta structures consumed. The number of 540 * ring slots used is always equal to the number of meta slots used 541 * plus the number of GSO descriptors used. Currently, we use either 542 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 543 * frontend-side LRO). 544 */ 545 static int netbk_gop_skb(struct sk_buff *skb, 546 struct netrx_pending_operations *npo) 547 { 548 struct xenvif *vif = netdev_priv(skb->dev); 549 int nr_frags = skb_shinfo(skb)->nr_frags; 550 int i; 551 struct xen_netif_rx_request *req; 552 struct netbk_rx_meta *meta; 553 unsigned char *data; 554 int head = 1; 555 int old_meta_prod; 556 557 old_meta_prod = npo->meta_prod; 558 559 /* Set up a GSO prefix descriptor, if necessary */ 560 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 561 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 562 meta = npo->meta + npo->meta_prod++; 563 meta->gso_size = skb_shinfo(skb)->gso_size; 564 meta->size = 0; 565 meta->id = req->id; 566 } 567 568 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 569 meta = npo->meta + npo->meta_prod++; 570 571 if (!vif->gso_prefix) 572 meta->gso_size = skb_shinfo(skb)->gso_size; 573 else 574 meta->gso_size = 0; 575 576 meta->size = 0; 577 meta->id = req->id; 578 npo->copy_off = 0; 579 npo->copy_gref = req->gref; 580 581 data = skb->data; 582 while (data < skb_tail_pointer(skb)) { 583 unsigned int offset = offset_in_page(data); 584 unsigned int len = PAGE_SIZE - offset; 585 586 if (data + len > skb_tail_pointer(skb)) 587 len = skb_tail_pointer(skb) - data; 588 589 netbk_gop_frag_copy(vif, skb, npo, 590 virt_to_page(data), len, offset, &head); 591 data += len; 592 } 593 594 for (i = 0; i < nr_frags; i++) { 595 netbk_gop_frag_copy(vif, skb, npo, 596 skb_frag_page(&skb_shinfo(skb)->frags[i]), 597 skb_frag_size(&skb_shinfo(skb)->frags[i]), 598 skb_shinfo(skb)->frags[i].page_offset, 599 &head); 600 } 601 602 return npo->meta_prod - old_meta_prod; 603 } 604 605 /* 606 * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was 607 * used to set up the operations on the top of 608 * netrx_pending_operations, which have since been done. Check that 609 * they didn't give any errors and advance over them. 610 */ 611 static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots, 612 struct netrx_pending_operations *npo) 613 { 614 struct gnttab_copy *copy_op; 615 int status = XEN_NETIF_RSP_OKAY; 616 int i; 617 618 for (i = 0; i < nr_meta_slots; i++) { 619 copy_op = npo->copy + npo->copy_cons++; 620 if (copy_op->status != GNTST_okay) { 621 netdev_dbg(vif->dev, 622 "Bad status %d from copy to DOM%d.\n", 623 copy_op->status, vif->domid); 624 status = XEN_NETIF_RSP_ERROR; 625 } 626 } 627 628 return status; 629 } 630 631 static void netbk_add_frag_responses(struct xenvif *vif, int status, 632 struct netbk_rx_meta *meta, 633 int nr_meta_slots) 634 { 635 int i; 636 unsigned long offset; 637 638 /* No fragments used */ 639 if (nr_meta_slots <= 1) 640 return; 641 642 nr_meta_slots--; 643 644 for (i = 0; i < nr_meta_slots; i++) { 645 int flags; 646 if (i == nr_meta_slots - 1) 647 flags = 0; 648 else 649 flags = XEN_NETRXF_more_data; 650 651 offset = 0; 652 make_rx_response(vif, meta[i].id, status, offset, 653 meta[i].size, flags); 654 } 655 } 656 657 struct skb_cb_overlay { 658 int meta_slots_used; 659 }; 660 661 static void xen_netbk_rx_action(struct xen_netbk *netbk) 662 { 663 struct xenvif *vif = NULL, *tmp; 664 s8 status; 665 u16 flags; 666 struct xen_netif_rx_response *resp; 667 struct sk_buff_head rxq; 668 struct sk_buff *skb; 669 LIST_HEAD(notify); 670 int ret; 671 int nr_frags; 672 int count; 673 unsigned long offset; 674 struct skb_cb_overlay *sco; 675 676 struct netrx_pending_operations npo = { 677 .copy = netbk->grant_copy_op, 678 .meta = netbk->meta, 679 }; 680 681 skb_queue_head_init(&rxq); 682 683 count = 0; 684 685 while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { 686 vif = netdev_priv(skb->dev); 687 nr_frags = skb_shinfo(skb)->nr_frags; 688 689 sco = (struct skb_cb_overlay *)skb->cb; 690 sco->meta_slots_used = netbk_gop_skb(skb, &npo); 691 692 count += nr_frags + 1; 693 694 __skb_queue_tail(&rxq, skb); 695 696 /* Filled the batch queue? */ 697 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 698 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 699 break; 700 } 701 702 BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); 703 704 if (!npo.copy_prod) 705 return; 706 707 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); 708 gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); 709 710 while ((skb = __skb_dequeue(&rxq)) != NULL) { 711 sco = (struct skb_cb_overlay *)skb->cb; 712 713 vif = netdev_priv(skb->dev); 714 715 if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 716 resp = RING_GET_RESPONSE(&vif->rx, 717 vif->rx.rsp_prod_pvt++); 718 719 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 720 721 resp->offset = netbk->meta[npo.meta_cons].gso_size; 722 resp->id = netbk->meta[npo.meta_cons].id; 723 resp->status = sco->meta_slots_used; 724 725 npo.meta_cons++; 726 sco->meta_slots_used--; 727 } 728 729 730 vif->dev->stats.tx_bytes += skb->len; 731 vif->dev->stats.tx_packets++; 732 733 status = netbk_check_gop(vif, sco->meta_slots_used, &npo); 734 735 if (sco->meta_slots_used == 1) 736 flags = 0; 737 else 738 flags = XEN_NETRXF_more_data; 739 740 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 741 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 742 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 743 /* remote but checksummed. */ 744 flags |= XEN_NETRXF_data_validated; 745 746 offset = 0; 747 resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id, 748 status, offset, 749 netbk->meta[npo.meta_cons].size, 750 flags); 751 752 if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 753 struct xen_netif_extra_info *gso = 754 (struct xen_netif_extra_info *) 755 RING_GET_RESPONSE(&vif->rx, 756 vif->rx.rsp_prod_pvt++); 757 758 resp->flags |= XEN_NETRXF_extra_info; 759 760 gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; 761 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 762 gso->u.gso.pad = 0; 763 gso->u.gso.features = 0; 764 765 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 766 gso->flags = 0; 767 } 768 769 netbk_add_frag_responses(vif, status, 770 netbk->meta + npo.meta_cons + 1, 771 sco->meta_slots_used); 772 773 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 774 775 xenvif_notify_tx_completion(vif); 776 777 if (ret && list_empty(&vif->notify_list)) 778 list_add_tail(&vif->notify_list, ¬ify); 779 else 780 xenvif_put(vif); 781 npo.meta_cons += sco->meta_slots_used; 782 dev_kfree_skb(skb); 783 } 784 785 list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) { 786 notify_remote_via_irq(vif->irq); 787 list_del_init(&vif->notify_list); 788 xenvif_put(vif); 789 } 790 791 /* More work to do? */ 792 if (!skb_queue_empty(&netbk->rx_queue) && 793 !timer_pending(&netbk->net_timer)) 794 xen_netbk_kick_thread(netbk); 795 } 796 797 void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 798 { 799 struct xen_netbk *netbk = vif->netbk; 800 801 skb_queue_tail(&netbk->rx_queue, skb); 802 803 xen_netbk_kick_thread(netbk); 804 } 805 806 static void xen_netbk_alarm(unsigned long data) 807 { 808 struct xen_netbk *netbk = (struct xen_netbk *)data; 809 xen_netbk_kick_thread(netbk); 810 } 811 812 static int __on_net_schedule_list(struct xenvif *vif) 813 { 814 return !list_empty(&vif->schedule_list); 815 } 816 817 /* Must be called with net_schedule_list_lock held */ 818 static void remove_from_net_schedule_list(struct xenvif *vif) 819 { 820 if (likely(__on_net_schedule_list(vif))) { 821 list_del_init(&vif->schedule_list); 822 xenvif_put(vif); 823 } 824 } 825 826 static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk) 827 { 828 struct xenvif *vif = NULL; 829 830 spin_lock_irq(&netbk->net_schedule_list_lock); 831 if (list_empty(&netbk->net_schedule_list)) 832 goto out; 833 834 vif = list_first_entry(&netbk->net_schedule_list, 835 struct xenvif, schedule_list); 836 if (!vif) 837 goto out; 838 839 xenvif_get(vif); 840 841 remove_from_net_schedule_list(vif); 842 out: 843 spin_unlock_irq(&netbk->net_schedule_list_lock); 844 return vif; 845 } 846 847 void xen_netbk_schedule_xenvif(struct xenvif *vif) 848 { 849 unsigned long flags; 850 struct xen_netbk *netbk = vif->netbk; 851 852 if (__on_net_schedule_list(vif)) 853 goto kick; 854 855 spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); 856 if (!__on_net_schedule_list(vif) && 857 likely(xenvif_schedulable(vif))) { 858 list_add_tail(&vif->schedule_list, &netbk->net_schedule_list); 859 xenvif_get(vif); 860 } 861 spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); 862 863 kick: 864 smp_mb(); 865 if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && 866 !list_empty(&netbk->net_schedule_list)) 867 xen_netbk_kick_thread(netbk); 868 } 869 870 void xen_netbk_deschedule_xenvif(struct xenvif *vif) 871 { 872 struct xen_netbk *netbk = vif->netbk; 873 spin_lock_irq(&netbk->net_schedule_list_lock); 874 remove_from_net_schedule_list(vif); 875 spin_unlock_irq(&netbk->net_schedule_list_lock); 876 } 877 878 void xen_netbk_check_rx_xenvif(struct xenvif *vif) 879 { 880 int more_to_do; 881 882 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 883 884 if (more_to_do) 885 xen_netbk_schedule_xenvif(vif); 886 } 887 888 static void tx_add_credit(struct xenvif *vif) 889 { 890 unsigned long max_burst, max_credit; 891 892 /* 893 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 894 * Otherwise the interface can seize up due to insufficient credit. 895 */ 896 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 897 max_burst = min(max_burst, 131072UL); 898 max_burst = max(max_burst, vif->credit_bytes); 899 900 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 901 max_credit = vif->remaining_credit + vif->credit_bytes; 902 if (max_credit < vif->remaining_credit) 903 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 904 905 vif->remaining_credit = min(max_credit, max_burst); 906 } 907 908 static void tx_credit_callback(unsigned long data) 909 { 910 struct xenvif *vif = (struct xenvif *)data; 911 tx_add_credit(vif); 912 xen_netbk_check_rx_xenvif(vif); 913 } 914 915 static void netbk_tx_err(struct xenvif *vif, 916 struct xen_netif_tx_request *txp, RING_IDX end) 917 { 918 RING_IDX cons = vif->tx.req_cons; 919 920 do { 921 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 922 if (cons == end) 923 break; 924 txp = RING_GET_REQUEST(&vif->tx, cons++); 925 } while (1); 926 vif->tx.req_cons = cons; 927 xen_netbk_check_rx_xenvif(vif); 928 xenvif_put(vif); 929 } 930 931 static void netbk_fatal_tx_err(struct xenvif *vif) 932 { 933 netdev_err(vif->dev, "fatal error; disabling device\n"); 934 xenvif_carrier_off(vif); 935 xenvif_put(vif); 936 } 937 938 static int netbk_count_requests(struct xenvif *vif, 939 struct xen_netif_tx_request *first, 940 struct xen_netif_tx_request *txp, 941 int work_to_do) 942 { 943 RING_IDX cons = vif->tx.req_cons; 944 int slots = 0; 945 int drop_err = 0; 946 int more_data; 947 948 if (!(first->flags & XEN_NETTXF_more_data)) 949 return 0; 950 951 do { 952 struct xen_netif_tx_request dropped_tx = { 0 }; 953 954 if (slots >= work_to_do) { 955 netdev_err(vif->dev, 956 "Asked for %d slots but exceeds this limit\n", 957 work_to_do); 958 netbk_fatal_tx_err(vif); 959 return -ENODATA; 960 } 961 962 /* This guest is really using too many slots and 963 * considered malicious. 964 */ 965 if (unlikely(slots >= fatal_skb_slots)) { 966 netdev_err(vif->dev, 967 "Malicious frontend using %d slots, threshold %u\n", 968 slots, fatal_skb_slots); 969 netbk_fatal_tx_err(vif); 970 return -E2BIG; 971 } 972 973 /* Xen network protocol had implicit dependency on 974 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 975 * the historical MAX_SKB_FRAGS value 18 to honor the 976 * same behavior as before. Any packet using more than 977 * 18 slots but less than fatal_skb_slots slots is 978 * dropped 979 */ 980 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 981 if (net_ratelimit()) 982 netdev_dbg(vif->dev, 983 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 984 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 985 drop_err = -E2BIG; 986 } 987 988 if (drop_err) 989 txp = &dropped_tx; 990 991 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 992 sizeof(*txp)); 993 994 /* If the guest submitted a frame >= 64 KiB then 995 * first->size overflowed and following slots will 996 * appear to be larger than the frame. 997 * 998 * This cannot be fatal error as there are buggy 999 * frontends that do this. 1000 * 1001 * Consume all slots and drop the packet. 1002 */ 1003 if (!drop_err && txp->size > first->size) { 1004 if (net_ratelimit()) 1005 netdev_dbg(vif->dev, 1006 "Invalid tx request, slot size %u > remaining size %u\n", 1007 txp->size, first->size); 1008 drop_err = -EIO; 1009 } 1010 1011 first->size -= txp->size; 1012 slots++; 1013 1014 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 1015 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 1016 txp->offset, txp->size); 1017 netbk_fatal_tx_err(vif); 1018 return -EINVAL; 1019 } 1020 1021 more_data = txp->flags & XEN_NETTXF_more_data; 1022 1023 if (!drop_err) 1024 txp++; 1025 1026 } while (more_data); 1027 1028 if (drop_err) { 1029 netbk_tx_err(vif, first, cons + slots); 1030 return drop_err; 1031 } 1032 1033 return slots; 1034 } 1035 1036 static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, 1037 u16 pending_idx) 1038 { 1039 struct page *page; 1040 page = alloc_page(GFP_KERNEL|__GFP_COLD); 1041 if (!page) 1042 return NULL; 1043 set_page_ext(page, netbk, pending_idx); 1044 netbk->mmap_pages[pending_idx] = page; 1045 return page; 1046 } 1047 1048 static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, 1049 struct xenvif *vif, 1050 struct sk_buff *skb, 1051 struct xen_netif_tx_request *txp, 1052 struct gnttab_copy *gop) 1053 { 1054 struct skb_shared_info *shinfo = skb_shinfo(skb); 1055 skb_frag_t *frags = shinfo->frags; 1056 u16 pending_idx = *((u16 *)skb->data); 1057 u16 head_idx = 0; 1058 int slot, start; 1059 struct page *page; 1060 pending_ring_idx_t index, start_idx = 0; 1061 uint16_t dst_offset; 1062 unsigned int nr_slots; 1063 struct pending_tx_info *first = NULL; 1064 1065 /* At this point shinfo->nr_frags is in fact the number of 1066 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 1067 */ 1068 nr_slots = shinfo->nr_frags; 1069 1070 /* Skip first skb fragment if it is on same page as header fragment. */ 1071 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1072 1073 /* Coalesce tx requests, at this point the packet passed in 1074 * should be <= 64K. Any packets larger than 64K have been 1075 * handled in netbk_count_requests(). 1076 */ 1077 for (shinfo->nr_frags = slot = start; slot < nr_slots; 1078 shinfo->nr_frags++) { 1079 struct pending_tx_info *pending_tx_info = 1080 netbk->pending_tx_info; 1081 1082 page = alloc_page(GFP_KERNEL|__GFP_COLD); 1083 if (!page) 1084 goto err; 1085 1086 dst_offset = 0; 1087 first = NULL; 1088 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 1089 gop->flags = GNTCOPY_source_gref; 1090 1091 gop->source.u.ref = txp->gref; 1092 gop->source.domid = vif->domid; 1093 gop->source.offset = txp->offset; 1094 1095 gop->dest.domid = DOMID_SELF; 1096 1097 gop->dest.offset = dst_offset; 1098 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1099 1100 if (dst_offset + txp->size > PAGE_SIZE) { 1101 /* This page can only merge a portion 1102 * of tx request. Do not increment any 1103 * pointer / counter here. The txp 1104 * will be dealt with in future 1105 * rounds, eventually hitting the 1106 * `else` branch. 1107 */ 1108 gop->len = PAGE_SIZE - dst_offset; 1109 txp->offset += gop->len; 1110 txp->size -= gop->len; 1111 dst_offset += gop->len; /* quit loop */ 1112 } else { 1113 /* This tx request can be merged in the page */ 1114 gop->len = txp->size; 1115 dst_offset += gop->len; 1116 1117 index = pending_index(netbk->pending_cons++); 1118 1119 pending_idx = netbk->pending_ring[index]; 1120 1121 memcpy(&pending_tx_info[pending_idx].req, txp, 1122 sizeof(*txp)); 1123 xenvif_get(vif); 1124 1125 pending_tx_info[pending_idx].vif = vif; 1126 1127 /* Poison these fields, corresponding 1128 * fields for head tx req will be set 1129 * to correct values after the loop. 1130 */ 1131 netbk->mmap_pages[pending_idx] = (void *)(~0UL); 1132 pending_tx_info[pending_idx].head = 1133 INVALID_PENDING_RING_IDX; 1134 1135 if (!first) { 1136 first = &pending_tx_info[pending_idx]; 1137 start_idx = index; 1138 head_idx = pending_idx; 1139 } 1140 1141 txp++; 1142 slot++; 1143 } 1144 1145 gop++; 1146 } 1147 1148 first->req.offset = 0; 1149 first->req.size = dst_offset; 1150 first->head = start_idx; 1151 set_page_ext(page, netbk, head_idx); 1152 netbk->mmap_pages[head_idx] = page; 1153 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 1154 } 1155 1156 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 1157 1158 return gop; 1159 err: 1160 /* Unwind, freeing all pages and sending error responses. */ 1161 while (shinfo->nr_frags-- > start) { 1162 xen_netbk_idx_release(netbk, 1163 frag_get_pending_idx(&frags[shinfo->nr_frags]), 1164 XEN_NETIF_RSP_ERROR); 1165 } 1166 /* The head too, if necessary. */ 1167 if (start) 1168 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 1169 1170 return NULL; 1171 } 1172 1173 static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, 1174 struct sk_buff *skb, 1175 struct gnttab_copy **gopp) 1176 { 1177 struct gnttab_copy *gop = *gopp; 1178 u16 pending_idx = *((u16 *)skb->data); 1179 struct skb_shared_info *shinfo = skb_shinfo(skb); 1180 struct pending_tx_info *tx_info; 1181 int nr_frags = shinfo->nr_frags; 1182 int i, err, start; 1183 u16 peek; /* peek into next tx request */ 1184 1185 /* Check status of header. */ 1186 err = gop->status; 1187 if (unlikely(err)) 1188 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 1189 1190 /* Skip first skb fragment if it is on same page as header fragment. */ 1191 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1192 1193 for (i = start; i < nr_frags; i++) { 1194 int j, newerr; 1195 pending_ring_idx_t head; 1196 1197 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 1198 tx_info = &netbk->pending_tx_info[pending_idx]; 1199 head = tx_info->head; 1200 1201 /* Check error status: if okay then remember grant handle. */ 1202 do { 1203 newerr = (++gop)->status; 1204 if (newerr) 1205 break; 1206 peek = netbk->pending_ring[pending_index(++head)]; 1207 } while (!pending_tx_is_head(netbk, peek)); 1208 1209 if (likely(!newerr)) { 1210 /* Had a previous error? Invalidate this fragment. */ 1211 if (unlikely(err)) 1212 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1213 continue; 1214 } 1215 1216 /* Error on this fragment: respond to client with an error. */ 1217 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 1218 1219 /* Not the first error? Preceding frags already invalidated. */ 1220 if (err) 1221 continue; 1222 1223 /* First error: invalidate header and preceding fragments. */ 1224 pending_idx = *((u16 *)skb->data); 1225 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1226 for (j = start; j < i; j++) { 1227 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1228 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1229 } 1230 1231 /* Remember the error: invalidate all subsequent fragments. */ 1232 err = newerr; 1233 } 1234 1235 *gopp = gop + 1; 1236 return err; 1237 } 1238 1239 static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) 1240 { 1241 struct skb_shared_info *shinfo = skb_shinfo(skb); 1242 int nr_frags = shinfo->nr_frags; 1243 int i; 1244 1245 for (i = 0; i < nr_frags; i++) { 1246 skb_frag_t *frag = shinfo->frags + i; 1247 struct xen_netif_tx_request *txp; 1248 struct page *page; 1249 u16 pending_idx; 1250 1251 pending_idx = frag_get_pending_idx(frag); 1252 1253 txp = &netbk->pending_tx_info[pending_idx].req; 1254 page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); 1255 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1256 skb->len += txp->size; 1257 skb->data_len += txp->size; 1258 skb->truesize += txp->size; 1259 1260 /* Take an extra reference to offset xen_netbk_idx_release */ 1261 get_page(netbk->mmap_pages[pending_idx]); 1262 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1263 } 1264 } 1265 1266 static int xen_netbk_get_extras(struct xenvif *vif, 1267 struct xen_netif_extra_info *extras, 1268 int work_to_do) 1269 { 1270 struct xen_netif_extra_info extra; 1271 RING_IDX cons = vif->tx.req_cons; 1272 1273 do { 1274 if (unlikely(work_to_do-- <= 0)) { 1275 netdev_err(vif->dev, "Missing extra info\n"); 1276 netbk_fatal_tx_err(vif); 1277 return -EBADR; 1278 } 1279 1280 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1281 sizeof(extra)); 1282 if (unlikely(!extra.type || 1283 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1284 vif->tx.req_cons = ++cons; 1285 netdev_err(vif->dev, 1286 "Invalid extra type: %d\n", extra.type); 1287 netbk_fatal_tx_err(vif); 1288 return -EINVAL; 1289 } 1290 1291 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1292 vif->tx.req_cons = ++cons; 1293 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1294 1295 return work_to_do; 1296 } 1297 1298 static int netbk_set_skb_gso(struct xenvif *vif, 1299 struct sk_buff *skb, 1300 struct xen_netif_extra_info *gso) 1301 { 1302 if (!gso->u.gso.size) { 1303 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1304 netbk_fatal_tx_err(vif); 1305 return -EINVAL; 1306 } 1307 1308 /* Currently only TCPv4 S.O. is supported. */ 1309 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1310 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1311 netbk_fatal_tx_err(vif); 1312 return -EINVAL; 1313 } 1314 1315 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1316 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1317 1318 /* Header must be checked, and gso_segs computed. */ 1319 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1320 skb_shinfo(skb)->gso_segs = 0; 1321 1322 return 0; 1323 } 1324 1325 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1326 { 1327 struct iphdr *iph; 1328 int err = -EPROTO; 1329 int recalculate_partial_csum = 0; 1330 1331 /* 1332 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1333 * peers can fail to set NETRXF_csum_blank when sending a GSO 1334 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1335 * recalculate the partial checksum. 1336 */ 1337 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1338 vif->rx_gso_checksum_fixup++; 1339 skb->ip_summed = CHECKSUM_PARTIAL; 1340 recalculate_partial_csum = 1; 1341 } 1342 1343 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1344 if (skb->ip_summed != CHECKSUM_PARTIAL) 1345 return 0; 1346 1347 if (skb->protocol != htons(ETH_P_IP)) 1348 goto out; 1349 1350 iph = (void *)skb->data; 1351 switch (iph->protocol) { 1352 case IPPROTO_TCP: 1353 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1354 offsetof(struct tcphdr, check))) 1355 goto out; 1356 1357 if (recalculate_partial_csum) { 1358 struct tcphdr *tcph = tcp_hdr(skb); 1359 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1360 skb->len - iph->ihl*4, 1361 IPPROTO_TCP, 0); 1362 } 1363 break; 1364 case IPPROTO_UDP: 1365 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1366 offsetof(struct udphdr, check))) 1367 goto out; 1368 1369 if (recalculate_partial_csum) { 1370 struct udphdr *udph = udp_hdr(skb); 1371 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1372 skb->len - iph->ihl*4, 1373 IPPROTO_UDP, 0); 1374 } 1375 break; 1376 default: 1377 if (net_ratelimit()) 1378 netdev_err(vif->dev, 1379 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1380 iph->protocol); 1381 goto out; 1382 } 1383 1384 err = 0; 1385 1386 out: 1387 return err; 1388 } 1389 1390 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1391 { 1392 unsigned long now = jiffies; 1393 unsigned long next_credit = 1394 vif->credit_timeout.expires + 1395 msecs_to_jiffies(vif->credit_usec / 1000); 1396 1397 /* Timer could already be pending in rare cases. */ 1398 if (timer_pending(&vif->credit_timeout)) 1399 return true; 1400 1401 /* Passed the point where we can replenish credit? */ 1402 if (time_after_eq(now, next_credit)) { 1403 vif->credit_timeout.expires = now; 1404 tx_add_credit(vif); 1405 } 1406 1407 /* Still too big to send right now? Set a callback. */ 1408 if (size > vif->remaining_credit) { 1409 vif->credit_timeout.data = 1410 (unsigned long)vif; 1411 vif->credit_timeout.function = 1412 tx_credit_callback; 1413 mod_timer(&vif->credit_timeout, 1414 next_credit); 1415 1416 return true; 1417 } 1418 1419 return false; 1420 } 1421 1422 static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) 1423 { 1424 struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; 1425 struct sk_buff *skb; 1426 int ret; 1427 1428 while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX 1429 < MAX_PENDING_REQS) && 1430 !list_empty(&netbk->net_schedule_list)) { 1431 struct xenvif *vif; 1432 struct xen_netif_tx_request txreq; 1433 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1434 struct page *page; 1435 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1436 u16 pending_idx; 1437 RING_IDX idx; 1438 int work_to_do; 1439 unsigned int data_len; 1440 pending_ring_idx_t index; 1441 1442 /* Get a netif from the list with work to do. */ 1443 vif = poll_net_schedule_list(netbk); 1444 /* This can sometimes happen because the test of 1445 * list_empty(net_schedule_list) at the top of the 1446 * loop is unlocked. Just go back and have another 1447 * look. 1448 */ 1449 if (!vif) 1450 continue; 1451 1452 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1453 XEN_NETIF_TX_RING_SIZE) { 1454 netdev_err(vif->dev, 1455 "Impossible number of requests. " 1456 "req_prod %d, req_cons %d, size %ld\n", 1457 vif->tx.sring->req_prod, vif->tx.req_cons, 1458 XEN_NETIF_TX_RING_SIZE); 1459 netbk_fatal_tx_err(vif); 1460 continue; 1461 } 1462 1463 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do); 1464 if (!work_to_do) { 1465 xenvif_put(vif); 1466 continue; 1467 } 1468 1469 idx = vif->tx.req_cons; 1470 rmb(); /* Ensure that we see the request before we copy it. */ 1471 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1472 1473 /* Credit-based scheduling. */ 1474 if (txreq.size > vif->remaining_credit && 1475 tx_credit_exceeded(vif, txreq.size)) { 1476 xenvif_put(vif); 1477 continue; 1478 } 1479 1480 vif->remaining_credit -= txreq.size; 1481 1482 work_to_do--; 1483 vif->tx.req_cons = ++idx; 1484 1485 memset(extras, 0, sizeof(extras)); 1486 if (txreq.flags & XEN_NETTXF_extra_info) { 1487 work_to_do = xen_netbk_get_extras(vif, extras, 1488 work_to_do); 1489 idx = vif->tx.req_cons; 1490 if (unlikely(work_to_do < 0)) 1491 continue; 1492 } 1493 1494 ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); 1495 if (unlikely(ret < 0)) 1496 continue; 1497 1498 idx += ret; 1499 1500 if (unlikely(txreq.size < ETH_HLEN)) { 1501 netdev_dbg(vif->dev, 1502 "Bad packet size: %d\n", txreq.size); 1503 netbk_tx_err(vif, &txreq, idx); 1504 continue; 1505 } 1506 1507 /* No crossing a page as the payload mustn't fragment. */ 1508 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1509 netdev_err(vif->dev, 1510 "txreq.offset: %x, size: %u, end: %lu\n", 1511 txreq.offset, txreq.size, 1512 (txreq.offset&~PAGE_MASK) + txreq.size); 1513 netbk_fatal_tx_err(vif); 1514 continue; 1515 } 1516 1517 index = pending_index(netbk->pending_cons); 1518 pending_idx = netbk->pending_ring[index]; 1519 1520 data_len = (txreq.size > PKT_PROT_LEN && 1521 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1522 PKT_PROT_LEN : txreq.size; 1523 1524 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1525 GFP_ATOMIC | __GFP_NOWARN); 1526 if (unlikely(skb == NULL)) { 1527 netdev_dbg(vif->dev, 1528 "Can't allocate a skb in start_xmit.\n"); 1529 netbk_tx_err(vif, &txreq, idx); 1530 break; 1531 } 1532 1533 /* Packets passed to netif_rx() must have some headroom. */ 1534 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1535 1536 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1537 struct xen_netif_extra_info *gso; 1538 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1539 1540 if (netbk_set_skb_gso(vif, skb, gso)) { 1541 /* Failure in netbk_set_skb_gso is fatal. */ 1542 kfree_skb(skb); 1543 continue; 1544 } 1545 } 1546 1547 /* XXX could copy straight to head */ 1548 page = xen_netbk_alloc_page(netbk, pending_idx); 1549 if (!page) { 1550 kfree_skb(skb); 1551 netbk_tx_err(vif, &txreq, idx); 1552 continue; 1553 } 1554 1555 gop->source.u.ref = txreq.gref; 1556 gop->source.domid = vif->domid; 1557 gop->source.offset = txreq.offset; 1558 1559 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1560 gop->dest.domid = DOMID_SELF; 1561 gop->dest.offset = txreq.offset; 1562 1563 gop->len = txreq.size; 1564 gop->flags = GNTCOPY_source_gref; 1565 1566 gop++; 1567 1568 memcpy(&netbk->pending_tx_info[pending_idx].req, 1569 &txreq, sizeof(txreq)); 1570 netbk->pending_tx_info[pending_idx].vif = vif; 1571 netbk->pending_tx_info[pending_idx].head = index; 1572 *((u16 *)skb->data) = pending_idx; 1573 1574 __skb_put(skb, data_len); 1575 1576 skb_shinfo(skb)->nr_frags = ret; 1577 if (data_len < txreq.size) { 1578 skb_shinfo(skb)->nr_frags++; 1579 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1580 pending_idx); 1581 } else { 1582 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1583 INVALID_PENDING_IDX); 1584 } 1585 1586 netbk->pending_cons++; 1587 1588 request_gop = xen_netbk_get_requests(netbk, vif, 1589 skb, txfrags, gop); 1590 if (request_gop == NULL) { 1591 kfree_skb(skb); 1592 netbk_tx_err(vif, &txreq, idx); 1593 continue; 1594 } 1595 gop = request_gop; 1596 1597 __skb_queue_tail(&netbk->tx_queue, skb); 1598 1599 vif->tx.req_cons = idx; 1600 xen_netbk_check_rx_xenvif(vif); 1601 1602 if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops)) 1603 break; 1604 } 1605 1606 return gop - netbk->tx_copy_ops; 1607 } 1608 1609 static void xen_netbk_tx_submit(struct xen_netbk *netbk) 1610 { 1611 struct gnttab_copy *gop = netbk->tx_copy_ops; 1612 struct sk_buff *skb; 1613 1614 while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { 1615 struct xen_netif_tx_request *txp; 1616 struct xenvif *vif; 1617 u16 pending_idx; 1618 unsigned data_len; 1619 1620 pending_idx = *((u16 *)skb->data); 1621 vif = netbk->pending_tx_info[pending_idx].vif; 1622 txp = &netbk->pending_tx_info[pending_idx].req; 1623 1624 /* Check the remap error code. */ 1625 if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) { 1626 netdev_dbg(vif->dev, "netback grant failed.\n"); 1627 skb_shinfo(skb)->nr_frags = 0; 1628 kfree_skb(skb); 1629 continue; 1630 } 1631 1632 data_len = skb->len; 1633 memcpy(skb->data, 1634 (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), 1635 data_len); 1636 if (data_len < txp->size) { 1637 /* Append the packet payload as a fragment. */ 1638 txp->offset += data_len; 1639 txp->size -= data_len; 1640 } else { 1641 /* Schedule a response immediately. */ 1642 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1643 } 1644 1645 if (txp->flags & XEN_NETTXF_csum_blank) 1646 skb->ip_summed = CHECKSUM_PARTIAL; 1647 else if (txp->flags & XEN_NETTXF_data_validated) 1648 skb->ip_summed = CHECKSUM_UNNECESSARY; 1649 1650 xen_netbk_fill_frags(netbk, skb); 1651 1652 /* 1653 * If the initial fragment was < PKT_PROT_LEN then 1654 * pull through some bytes from the other fragments to 1655 * increase the linear region to PKT_PROT_LEN bytes. 1656 */ 1657 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { 1658 int target = min_t(int, skb->len, PKT_PROT_LEN); 1659 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1660 } 1661 1662 skb->dev = vif->dev; 1663 skb->protocol = eth_type_trans(skb, skb->dev); 1664 skb_reset_network_header(skb); 1665 1666 if (checksum_setup(vif, skb)) { 1667 netdev_dbg(vif->dev, 1668 "Can't setup checksum in net_tx_action\n"); 1669 kfree_skb(skb); 1670 continue; 1671 } 1672 1673 skb_probe_transport_header(skb, 0); 1674 1675 vif->dev->stats.rx_bytes += skb->len; 1676 vif->dev->stats.rx_packets++; 1677 1678 xenvif_receive_skb(vif, skb); 1679 } 1680 } 1681 1682 /* Called after netfront has transmitted */ 1683 static void xen_netbk_tx_action(struct xen_netbk *netbk) 1684 { 1685 unsigned nr_gops; 1686 1687 nr_gops = xen_netbk_tx_build_gops(netbk); 1688 1689 if (nr_gops == 0) 1690 return; 1691 1692 gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); 1693 1694 xen_netbk_tx_submit(netbk); 1695 } 1696 1697 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, 1698 u8 status) 1699 { 1700 struct xenvif *vif; 1701 struct pending_tx_info *pending_tx_info; 1702 pending_ring_idx_t head; 1703 u16 peek; /* peek into next tx request */ 1704 1705 BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); 1706 1707 /* Already complete? */ 1708 if (netbk->mmap_pages[pending_idx] == NULL) 1709 return; 1710 1711 pending_tx_info = &netbk->pending_tx_info[pending_idx]; 1712 1713 vif = pending_tx_info->vif; 1714 head = pending_tx_info->head; 1715 1716 BUG_ON(!pending_tx_is_head(netbk, head)); 1717 BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); 1718 1719 do { 1720 pending_ring_idx_t index; 1721 pending_ring_idx_t idx = pending_index(head); 1722 u16 info_idx = netbk->pending_ring[idx]; 1723 1724 pending_tx_info = &netbk->pending_tx_info[info_idx]; 1725 make_tx_response(vif, &pending_tx_info->req, status); 1726 1727 /* Setting any number other than 1728 * INVALID_PENDING_RING_IDX indicates this slot is 1729 * starting a new packet / ending a previous packet. 1730 */ 1731 pending_tx_info->head = 0; 1732 1733 index = pending_index(netbk->pending_prod++); 1734 netbk->pending_ring[index] = netbk->pending_ring[info_idx]; 1735 1736 xenvif_put(vif); 1737 1738 peek = netbk->pending_ring[pending_index(++head)]; 1739 1740 } while (!pending_tx_is_head(netbk, peek)); 1741 1742 netbk->mmap_pages[pending_idx]->mapping = 0; 1743 put_page(netbk->mmap_pages[pending_idx]); 1744 netbk->mmap_pages[pending_idx] = NULL; 1745 } 1746 1747 1748 static void make_tx_response(struct xenvif *vif, 1749 struct xen_netif_tx_request *txp, 1750 s8 st) 1751 { 1752 RING_IDX i = vif->tx.rsp_prod_pvt; 1753 struct xen_netif_tx_response *resp; 1754 int notify; 1755 1756 resp = RING_GET_RESPONSE(&vif->tx, i); 1757 resp->id = txp->id; 1758 resp->status = st; 1759 1760 if (txp->flags & XEN_NETTXF_extra_info) 1761 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1762 1763 vif->tx.rsp_prod_pvt = ++i; 1764 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1765 if (notify) 1766 notify_remote_via_irq(vif->irq); 1767 } 1768 1769 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1770 u16 id, 1771 s8 st, 1772 u16 offset, 1773 u16 size, 1774 u16 flags) 1775 { 1776 RING_IDX i = vif->rx.rsp_prod_pvt; 1777 struct xen_netif_rx_response *resp; 1778 1779 resp = RING_GET_RESPONSE(&vif->rx, i); 1780 resp->offset = offset; 1781 resp->flags = flags; 1782 resp->id = id; 1783 resp->status = (s16)size; 1784 if (st < 0) 1785 resp->status = (s16)st; 1786 1787 vif->rx.rsp_prod_pvt = ++i; 1788 1789 return resp; 1790 } 1791 1792 static inline int rx_work_todo(struct xen_netbk *netbk) 1793 { 1794 return !skb_queue_empty(&netbk->rx_queue); 1795 } 1796 1797 static inline int tx_work_todo(struct xen_netbk *netbk) 1798 { 1799 1800 if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX 1801 < MAX_PENDING_REQS) && 1802 !list_empty(&netbk->net_schedule_list)) 1803 return 1; 1804 1805 return 0; 1806 } 1807 1808 static int xen_netbk_kthread(void *data) 1809 { 1810 struct xen_netbk *netbk = data; 1811 while (!kthread_should_stop()) { 1812 wait_event_interruptible(netbk->wq, 1813 rx_work_todo(netbk) || 1814 tx_work_todo(netbk) || 1815 kthread_should_stop()); 1816 cond_resched(); 1817 1818 if (kthread_should_stop()) 1819 break; 1820 1821 if (rx_work_todo(netbk)) 1822 xen_netbk_rx_action(netbk); 1823 1824 if (tx_work_todo(netbk)) 1825 xen_netbk_tx_action(netbk); 1826 } 1827 1828 return 0; 1829 } 1830 1831 void xen_netbk_unmap_frontend_rings(struct xenvif *vif) 1832 { 1833 if (vif->tx.sring) 1834 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1835 vif->tx.sring); 1836 if (vif->rx.sring) 1837 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1838 vif->rx.sring); 1839 } 1840 1841 int xen_netbk_map_frontend_rings(struct xenvif *vif, 1842 grant_ref_t tx_ring_ref, 1843 grant_ref_t rx_ring_ref) 1844 { 1845 void *addr; 1846 struct xen_netif_tx_sring *txs; 1847 struct xen_netif_rx_sring *rxs; 1848 1849 int err = -ENOMEM; 1850 1851 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1852 tx_ring_ref, &addr); 1853 if (err) 1854 goto err; 1855 1856 txs = (struct xen_netif_tx_sring *)addr; 1857 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1858 1859 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1860 rx_ring_ref, &addr); 1861 if (err) 1862 goto err; 1863 1864 rxs = (struct xen_netif_rx_sring *)addr; 1865 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1866 1867 vif->rx_req_cons_peek = 0; 1868 1869 return 0; 1870 1871 err: 1872 xen_netbk_unmap_frontend_rings(vif); 1873 return err; 1874 } 1875 1876 static int __init netback_init(void) 1877 { 1878 int i; 1879 int rc = 0; 1880 int group; 1881 1882 if (!xen_domain()) 1883 return -ENODEV; 1884 1885 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1886 printk(KERN_INFO 1887 "xen-netback: fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1888 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1889 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1890 } 1891 1892 xen_netbk_group_nr = num_online_cpus(); 1893 xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); 1894 if (!xen_netbk) 1895 return -ENOMEM; 1896 1897 for (group = 0; group < xen_netbk_group_nr; group++) { 1898 struct xen_netbk *netbk = &xen_netbk[group]; 1899 skb_queue_head_init(&netbk->rx_queue); 1900 skb_queue_head_init(&netbk->tx_queue); 1901 1902 init_timer(&netbk->net_timer); 1903 netbk->net_timer.data = (unsigned long)netbk; 1904 netbk->net_timer.function = xen_netbk_alarm; 1905 1906 netbk->pending_cons = 0; 1907 netbk->pending_prod = MAX_PENDING_REQS; 1908 for (i = 0; i < MAX_PENDING_REQS; i++) 1909 netbk->pending_ring[i] = i; 1910 1911 init_waitqueue_head(&netbk->wq); 1912 netbk->task = kthread_create(xen_netbk_kthread, 1913 (void *)netbk, 1914 "netback/%u", group); 1915 1916 if (IS_ERR(netbk->task)) { 1917 printk(KERN_ALERT "kthread_create() fails at netback\n"); 1918 del_timer(&netbk->net_timer); 1919 rc = PTR_ERR(netbk->task); 1920 goto failed_init; 1921 } 1922 1923 kthread_bind(netbk->task, group); 1924 1925 INIT_LIST_HEAD(&netbk->net_schedule_list); 1926 1927 spin_lock_init(&netbk->net_schedule_list_lock); 1928 1929 atomic_set(&netbk->netfront_count, 0); 1930 1931 wake_up_process(netbk->task); 1932 } 1933 1934 rc = xenvif_xenbus_init(); 1935 if (rc) 1936 goto failed_init; 1937 1938 return 0; 1939 1940 failed_init: 1941 while (--group >= 0) { 1942 struct xen_netbk *netbk = &xen_netbk[group]; 1943 for (i = 0; i < MAX_PENDING_REQS; i++) { 1944 if (netbk->mmap_pages[i]) 1945 __free_page(netbk->mmap_pages[i]); 1946 } 1947 del_timer(&netbk->net_timer); 1948 kthread_stop(netbk->task); 1949 } 1950 vfree(xen_netbk); 1951 return rc; 1952 1953 } 1954 1955 module_init(netback_init); 1956 1957 MODULE_LICENSE("Dual BSD/GPL"); 1958 MODULE_ALIAS("xen-backend:vif"); 1959