1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 43 #include <xen/xen.h> 44 #include <xen/events.h> 45 #include <xen/interface/memory.h> 46 47 #include <asm/xen/hypercall.h> 48 #include <asm/xen/page.h> 49 50 /* Provide an option to disable split event channels at load time as 51 * event channels are limited resource. Split event channels are 52 * enabled by default. 53 */ 54 bool separate_tx_rx_irq = 1; 55 module_param(separate_tx_rx_irq, bool, 0644); 56 57 /* 58 * This is the maximum slots a skb can have. If a guest sends a skb 59 * which exceeds this limit it is considered malicious. 60 */ 61 #define FATAL_SKB_SLOTS_DEFAULT 20 62 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 63 module_param(fatal_skb_slots, uint, 0444); 64 65 /* 66 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 67 * the maximum slots a valid packet can use. Now this value is defined 68 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 69 * all backend. 70 */ 71 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 72 73 /* 74 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 75 * one or more merged tx requests, otherwise it is the continuation of 76 * previous tx request. 77 */ 78 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) 79 { 80 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 81 } 82 83 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 84 u8 status); 85 86 static void make_tx_response(struct xenvif *vif, 87 struct xen_netif_tx_request *txp, 88 s8 st); 89 90 static inline int tx_work_todo(struct xenvif *vif); 91 static inline int rx_work_todo(struct xenvif *vif); 92 93 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 94 u16 id, 95 s8 st, 96 u16 offset, 97 u16 size, 98 u16 flags); 99 100 static inline unsigned long idx_to_pfn(struct xenvif *vif, 101 u16 idx) 102 { 103 return page_to_pfn(vif->mmap_pages[idx]); 104 } 105 106 static inline unsigned long idx_to_kaddr(struct xenvif *vif, 107 u16 idx) 108 { 109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 110 } 111 112 /* 113 * This is the amount of packet we copy rather than map, so that the 114 * guest can't fiddle with the contents of the headers while we do 115 * packet processing on them (netfilter, routing, etc). 116 */ 117 #define PKT_PROT_LEN (ETH_HLEN + \ 118 VLAN_HLEN + \ 119 sizeof(struct iphdr) + MAX_IPOPTLEN + \ 120 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) 121 122 static u16 frag_get_pending_idx(skb_frag_t *frag) 123 { 124 return (u16)frag->page_offset; 125 } 126 127 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 128 { 129 frag->page_offset = pending_idx; 130 } 131 132 static inline pending_ring_idx_t pending_index(unsigned i) 133 { 134 return i & (MAX_PENDING_REQS-1); 135 } 136 137 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 138 { 139 return MAX_PENDING_REQS - 140 vif->pending_prod + vif->pending_cons; 141 } 142 143 static int max_required_rx_slots(struct xenvif *vif) 144 { 145 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 146 147 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 148 if (vif->can_sg || vif->gso || vif->gso_prefix) 149 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 150 151 return max; 152 } 153 154 int xenvif_rx_ring_full(struct xenvif *vif) 155 { 156 RING_IDX peek = vif->rx_req_cons_peek; 157 RING_IDX needed = max_required_rx_slots(vif); 158 159 return ((vif->rx.sring->req_prod - peek) < needed) || 160 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 161 } 162 163 int xenvif_must_stop_queue(struct xenvif *vif) 164 { 165 if (!xenvif_rx_ring_full(vif)) 166 return 0; 167 168 vif->rx.sring->req_event = vif->rx_req_cons_peek + 169 max_required_rx_slots(vif); 170 mb(); /* request notification /then/ check the queue */ 171 172 return xenvif_rx_ring_full(vif); 173 } 174 175 /* 176 * Returns true if we should start a new receive buffer instead of 177 * adding 'size' bytes to a buffer which currently contains 'offset' 178 * bytes. 179 */ 180 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 181 { 182 /* simple case: we have completely filled the current buffer. */ 183 if (offset == MAX_BUFFER_OFFSET) 184 return true; 185 186 /* 187 * complex case: start a fresh buffer if the current frag 188 * would overflow the current buffer but only if: 189 * (i) this frag would fit completely in the next buffer 190 * and (ii) there is already some data in the current buffer 191 * and (iii) this is not the head buffer. 192 * 193 * Where: 194 * - (i) stops us splitting a frag into two copies 195 * unless the frag is too large for a single buffer. 196 * - (ii) stops us from leaving a buffer pointlessly empty. 197 * - (iii) stops us leaving the first buffer 198 * empty. Strictly speaking this is already covered 199 * by (ii) but is explicitly checked because 200 * netfront relies on the first buffer being 201 * non-empty and can crash otherwise. 202 * 203 * This means we will effectively linearise small 204 * frags but do not needlessly split large buffers 205 * into multiple copies tend to give large frags their 206 * own buffers as before. 207 */ 208 if ((offset + size > MAX_BUFFER_OFFSET) && 209 (size <= MAX_BUFFER_OFFSET) && offset && !head) 210 return true; 211 212 return false; 213 } 214 215 /* 216 * Figure out how many ring slots we're going to need to send @skb to 217 * the guest. This function is essentially a dry run of 218 * xenvif_gop_frag_copy. 219 */ 220 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 221 { 222 unsigned int count; 223 int i, copy_off; 224 225 count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE); 226 227 copy_off = skb_headlen(skb) % PAGE_SIZE; 228 229 if (skb_shinfo(skb)->gso_size) 230 count++; 231 232 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 233 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 234 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 235 unsigned long bytes; 236 237 offset &= ~PAGE_MASK; 238 239 while (size > 0) { 240 BUG_ON(offset >= PAGE_SIZE); 241 BUG_ON(copy_off > MAX_BUFFER_OFFSET); 242 243 bytes = PAGE_SIZE - offset; 244 245 if (bytes > size) 246 bytes = size; 247 248 if (start_new_rx_buffer(copy_off, bytes, 0)) { 249 count++; 250 copy_off = 0; 251 } 252 253 if (copy_off + bytes > MAX_BUFFER_OFFSET) 254 bytes = MAX_BUFFER_OFFSET - copy_off; 255 256 copy_off += bytes; 257 258 offset += bytes; 259 size -= bytes; 260 261 if (offset == PAGE_SIZE) 262 offset = 0; 263 } 264 } 265 return count; 266 } 267 268 struct netrx_pending_operations { 269 unsigned copy_prod, copy_cons; 270 unsigned meta_prod, meta_cons; 271 struct gnttab_copy *copy; 272 struct xenvif_rx_meta *meta; 273 int copy_off; 274 grant_ref_t copy_gref; 275 }; 276 277 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 278 struct netrx_pending_operations *npo) 279 { 280 struct xenvif_rx_meta *meta; 281 struct xen_netif_rx_request *req; 282 283 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 284 285 meta = npo->meta + npo->meta_prod++; 286 meta->gso_size = 0; 287 meta->size = 0; 288 meta->id = req->id; 289 290 npo->copy_off = 0; 291 npo->copy_gref = req->gref; 292 293 return meta; 294 } 295 296 /* 297 * Set up the grant operations for this fragment. If it's a flipping 298 * interface, we also set up the unmap request from here. 299 */ 300 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 301 struct netrx_pending_operations *npo, 302 struct page *page, unsigned long size, 303 unsigned long offset, int *head) 304 { 305 struct gnttab_copy *copy_gop; 306 struct xenvif_rx_meta *meta; 307 unsigned long bytes; 308 309 /* Data must not cross a page boundary. */ 310 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 311 312 meta = npo->meta + npo->meta_prod - 1; 313 314 /* Skip unused frames from start of page */ 315 page += offset >> PAGE_SHIFT; 316 offset &= ~PAGE_MASK; 317 318 while (size > 0) { 319 BUG_ON(offset >= PAGE_SIZE); 320 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 321 322 bytes = PAGE_SIZE - offset; 323 324 if (bytes > size) 325 bytes = size; 326 327 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 328 /* 329 * Netfront requires there to be some data in the head 330 * buffer. 331 */ 332 BUG_ON(*head); 333 334 meta = get_next_rx_buffer(vif, npo); 335 } 336 337 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 338 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 339 340 copy_gop = npo->copy + npo->copy_prod++; 341 copy_gop->flags = GNTCOPY_dest_gref; 342 copy_gop->len = bytes; 343 344 copy_gop->source.domid = DOMID_SELF; 345 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 346 copy_gop->source.offset = offset; 347 348 copy_gop->dest.domid = vif->domid; 349 copy_gop->dest.offset = npo->copy_off; 350 copy_gop->dest.u.ref = npo->copy_gref; 351 352 npo->copy_off += bytes; 353 meta->size += bytes; 354 355 offset += bytes; 356 size -= bytes; 357 358 /* Next frame */ 359 if (offset == PAGE_SIZE && size) { 360 BUG_ON(!PageCompound(page)); 361 page++; 362 offset = 0; 363 } 364 365 /* Leave a gap for the GSO descriptor. */ 366 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 367 vif->rx.req_cons++; 368 369 *head = 0; /* There must be something in this buffer now. */ 370 371 } 372 } 373 374 /* 375 * Prepare an SKB to be transmitted to the frontend. 376 * 377 * This function is responsible for allocating grant operations, meta 378 * structures, etc. 379 * 380 * It returns the number of meta structures consumed. The number of 381 * ring slots used is always equal to the number of meta slots used 382 * plus the number of GSO descriptors used. Currently, we use either 383 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 384 * frontend-side LRO). 385 */ 386 static int xenvif_gop_skb(struct sk_buff *skb, 387 struct netrx_pending_operations *npo) 388 { 389 struct xenvif *vif = netdev_priv(skb->dev); 390 int nr_frags = skb_shinfo(skb)->nr_frags; 391 int i; 392 struct xen_netif_rx_request *req; 393 struct xenvif_rx_meta *meta; 394 unsigned char *data; 395 int head = 1; 396 int old_meta_prod; 397 398 old_meta_prod = npo->meta_prod; 399 400 /* Set up a GSO prefix descriptor, if necessary */ 401 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 402 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 403 meta = npo->meta + npo->meta_prod++; 404 meta->gso_size = skb_shinfo(skb)->gso_size; 405 meta->size = 0; 406 meta->id = req->id; 407 } 408 409 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 410 meta = npo->meta + npo->meta_prod++; 411 412 if (!vif->gso_prefix) 413 meta->gso_size = skb_shinfo(skb)->gso_size; 414 else 415 meta->gso_size = 0; 416 417 meta->size = 0; 418 meta->id = req->id; 419 npo->copy_off = 0; 420 npo->copy_gref = req->gref; 421 422 data = skb->data; 423 while (data < skb_tail_pointer(skb)) { 424 unsigned int offset = offset_in_page(data); 425 unsigned int len = PAGE_SIZE - offset; 426 427 if (data + len > skb_tail_pointer(skb)) 428 len = skb_tail_pointer(skb) - data; 429 430 xenvif_gop_frag_copy(vif, skb, npo, 431 virt_to_page(data), len, offset, &head); 432 data += len; 433 } 434 435 for (i = 0; i < nr_frags; i++) { 436 xenvif_gop_frag_copy(vif, skb, npo, 437 skb_frag_page(&skb_shinfo(skb)->frags[i]), 438 skb_frag_size(&skb_shinfo(skb)->frags[i]), 439 skb_shinfo(skb)->frags[i].page_offset, 440 &head); 441 } 442 443 return npo->meta_prod - old_meta_prod; 444 } 445 446 /* 447 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was 448 * used to set up the operations on the top of 449 * netrx_pending_operations, which have since been done. Check that 450 * they didn't give any errors and advance over them. 451 */ 452 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, 453 struct netrx_pending_operations *npo) 454 { 455 struct gnttab_copy *copy_op; 456 int status = XEN_NETIF_RSP_OKAY; 457 int i; 458 459 for (i = 0; i < nr_meta_slots; i++) { 460 copy_op = npo->copy + npo->copy_cons++; 461 if (copy_op->status != GNTST_okay) { 462 netdev_dbg(vif->dev, 463 "Bad status %d from copy to DOM%d.\n", 464 copy_op->status, vif->domid); 465 status = XEN_NETIF_RSP_ERROR; 466 } 467 } 468 469 return status; 470 } 471 472 static void xenvif_add_frag_responses(struct xenvif *vif, int status, 473 struct xenvif_rx_meta *meta, 474 int nr_meta_slots) 475 { 476 int i; 477 unsigned long offset; 478 479 /* No fragments used */ 480 if (nr_meta_slots <= 1) 481 return; 482 483 nr_meta_slots--; 484 485 for (i = 0; i < nr_meta_slots; i++) { 486 int flags; 487 if (i == nr_meta_slots - 1) 488 flags = 0; 489 else 490 flags = XEN_NETRXF_more_data; 491 492 offset = 0; 493 make_rx_response(vif, meta[i].id, status, offset, 494 meta[i].size, flags); 495 } 496 } 497 498 struct skb_cb_overlay { 499 int meta_slots_used; 500 }; 501 502 static void xenvif_kick_thread(struct xenvif *vif) 503 { 504 wake_up(&vif->wq); 505 } 506 507 void xenvif_rx_action(struct xenvif *vif) 508 { 509 s8 status; 510 u16 flags; 511 struct xen_netif_rx_response *resp; 512 struct sk_buff_head rxq; 513 struct sk_buff *skb; 514 LIST_HEAD(notify); 515 int ret; 516 int nr_frags; 517 int count; 518 unsigned long offset; 519 struct skb_cb_overlay *sco; 520 int need_to_notify = 0; 521 522 struct netrx_pending_operations npo = { 523 .copy = vif->grant_copy_op, 524 .meta = vif->meta, 525 }; 526 527 skb_queue_head_init(&rxq); 528 529 count = 0; 530 531 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 532 vif = netdev_priv(skb->dev); 533 nr_frags = skb_shinfo(skb)->nr_frags; 534 535 sco = (struct skb_cb_overlay *)skb->cb; 536 sco->meta_slots_used = xenvif_gop_skb(skb, &npo); 537 538 count += nr_frags + 1; 539 540 __skb_queue_tail(&rxq, skb); 541 542 /* Filled the batch queue? */ 543 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 544 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 545 break; 546 } 547 548 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 549 550 if (!npo.copy_prod) 551 return; 552 553 BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); 554 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 555 556 while ((skb = __skb_dequeue(&rxq)) != NULL) { 557 sco = (struct skb_cb_overlay *)skb->cb; 558 559 vif = netdev_priv(skb->dev); 560 561 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 562 resp = RING_GET_RESPONSE(&vif->rx, 563 vif->rx.rsp_prod_pvt++); 564 565 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 566 567 resp->offset = vif->meta[npo.meta_cons].gso_size; 568 resp->id = vif->meta[npo.meta_cons].id; 569 resp->status = sco->meta_slots_used; 570 571 npo.meta_cons++; 572 sco->meta_slots_used--; 573 } 574 575 576 vif->dev->stats.tx_bytes += skb->len; 577 vif->dev->stats.tx_packets++; 578 579 status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); 580 581 if (sco->meta_slots_used == 1) 582 flags = 0; 583 else 584 flags = XEN_NETRXF_more_data; 585 586 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 587 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 588 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 589 /* remote but checksummed. */ 590 flags |= XEN_NETRXF_data_validated; 591 592 offset = 0; 593 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 594 status, offset, 595 vif->meta[npo.meta_cons].size, 596 flags); 597 598 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 599 struct xen_netif_extra_info *gso = 600 (struct xen_netif_extra_info *) 601 RING_GET_RESPONSE(&vif->rx, 602 vif->rx.rsp_prod_pvt++); 603 604 resp->flags |= XEN_NETRXF_extra_info; 605 606 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 607 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 608 gso->u.gso.pad = 0; 609 gso->u.gso.features = 0; 610 611 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 612 gso->flags = 0; 613 } 614 615 xenvif_add_frag_responses(vif, status, 616 vif->meta + npo.meta_cons + 1, 617 sco->meta_slots_used); 618 619 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 620 621 if (ret) 622 need_to_notify = 1; 623 624 xenvif_notify_tx_completion(vif); 625 626 npo.meta_cons += sco->meta_slots_used; 627 dev_kfree_skb(skb); 628 } 629 630 if (need_to_notify) 631 notify_remote_via_irq(vif->rx_irq); 632 633 /* More work to do? */ 634 if (!skb_queue_empty(&vif->rx_queue)) 635 xenvif_kick_thread(vif); 636 } 637 638 void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 639 { 640 skb_queue_tail(&vif->rx_queue, skb); 641 642 xenvif_kick_thread(vif); 643 } 644 645 void xenvif_check_rx_xenvif(struct xenvif *vif) 646 { 647 int more_to_do; 648 649 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 650 651 if (more_to_do) 652 napi_schedule(&vif->napi); 653 } 654 655 static void tx_add_credit(struct xenvif *vif) 656 { 657 unsigned long max_burst, max_credit; 658 659 /* 660 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 661 * Otherwise the interface can seize up due to insufficient credit. 662 */ 663 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 664 max_burst = min(max_burst, 131072UL); 665 max_burst = max(max_burst, vif->credit_bytes); 666 667 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 668 max_credit = vif->remaining_credit + vif->credit_bytes; 669 if (max_credit < vif->remaining_credit) 670 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 671 672 vif->remaining_credit = min(max_credit, max_burst); 673 } 674 675 static void tx_credit_callback(unsigned long data) 676 { 677 struct xenvif *vif = (struct xenvif *)data; 678 tx_add_credit(vif); 679 xenvif_check_rx_xenvif(vif); 680 } 681 682 static void xenvif_tx_err(struct xenvif *vif, 683 struct xen_netif_tx_request *txp, RING_IDX end) 684 { 685 RING_IDX cons = vif->tx.req_cons; 686 687 do { 688 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 689 if (cons == end) 690 break; 691 txp = RING_GET_REQUEST(&vif->tx, cons++); 692 } while (1); 693 vif->tx.req_cons = cons; 694 } 695 696 static void xenvif_fatal_tx_err(struct xenvif *vif) 697 { 698 netdev_err(vif->dev, "fatal error; disabling device\n"); 699 xenvif_carrier_off(vif); 700 } 701 702 static int xenvif_count_requests(struct xenvif *vif, 703 struct xen_netif_tx_request *first, 704 struct xen_netif_tx_request *txp, 705 int work_to_do) 706 { 707 RING_IDX cons = vif->tx.req_cons; 708 int slots = 0; 709 int drop_err = 0; 710 int more_data; 711 712 if (!(first->flags & XEN_NETTXF_more_data)) 713 return 0; 714 715 do { 716 struct xen_netif_tx_request dropped_tx = { 0 }; 717 718 if (slots >= work_to_do) { 719 netdev_err(vif->dev, 720 "Asked for %d slots but exceeds this limit\n", 721 work_to_do); 722 xenvif_fatal_tx_err(vif); 723 return -ENODATA; 724 } 725 726 /* This guest is really using too many slots and 727 * considered malicious. 728 */ 729 if (unlikely(slots >= fatal_skb_slots)) { 730 netdev_err(vif->dev, 731 "Malicious frontend using %d slots, threshold %u\n", 732 slots, fatal_skb_slots); 733 xenvif_fatal_tx_err(vif); 734 return -E2BIG; 735 } 736 737 /* Xen network protocol had implicit dependency on 738 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 739 * the historical MAX_SKB_FRAGS value 18 to honor the 740 * same behavior as before. Any packet using more than 741 * 18 slots but less than fatal_skb_slots slots is 742 * dropped 743 */ 744 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 745 if (net_ratelimit()) 746 netdev_dbg(vif->dev, 747 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 748 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 749 drop_err = -E2BIG; 750 } 751 752 if (drop_err) 753 txp = &dropped_tx; 754 755 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 756 sizeof(*txp)); 757 758 /* If the guest submitted a frame >= 64 KiB then 759 * first->size overflowed and following slots will 760 * appear to be larger than the frame. 761 * 762 * This cannot be fatal error as there are buggy 763 * frontends that do this. 764 * 765 * Consume all slots and drop the packet. 766 */ 767 if (!drop_err && txp->size > first->size) { 768 if (net_ratelimit()) 769 netdev_dbg(vif->dev, 770 "Invalid tx request, slot size %u > remaining size %u\n", 771 txp->size, first->size); 772 drop_err = -EIO; 773 } 774 775 first->size -= txp->size; 776 slots++; 777 778 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 779 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 780 txp->offset, txp->size); 781 xenvif_fatal_tx_err(vif); 782 return -EINVAL; 783 } 784 785 more_data = txp->flags & XEN_NETTXF_more_data; 786 787 if (!drop_err) 788 txp++; 789 790 } while (more_data); 791 792 if (drop_err) { 793 xenvif_tx_err(vif, first, cons + slots); 794 return drop_err; 795 } 796 797 return slots; 798 } 799 800 static struct page *xenvif_alloc_page(struct xenvif *vif, 801 u16 pending_idx) 802 { 803 struct page *page; 804 805 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 806 if (!page) 807 return NULL; 808 vif->mmap_pages[pending_idx] = page; 809 810 return page; 811 } 812 813 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 814 struct sk_buff *skb, 815 struct xen_netif_tx_request *txp, 816 struct gnttab_copy *gop) 817 { 818 struct skb_shared_info *shinfo = skb_shinfo(skb); 819 skb_frag_t *frags = shinfo->frags; 820 u16 pending_idx = *((u16 *)skb->data); 821 u16 head_idx = 0; 822 int slot, start; 823 struct page *page; 824 pending_ring_idx_t index, start_idx = 0; 825 uint16_t dst_offset; 826 unsigned int nr_slots; 827 struct pending_tx_info *first = NULL; 828 829 /* At this point shinfo->nr_frags is in fact the number of 830 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 831 */ 832 nr_slots = shinfo->nr_frags; 833 834 /* Skip first skb fragment if it is on same page as header fragment. */ 835 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 836 837 /* Coalesce tx requests, at this point the packet passed in 838 * should be <= 64K. Any packets larger than 64K have been 839 * handled in xenvif_count_requests(). 840 */ 841 for (shinfo->nr_frags = slot = start; slot < nr_slots; 842 shinfo->nr_frags++) { 843 struct pending_tx_info *pending_tx_info = 844 vif->pending_tx_info; 845 846 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 847 if (!page) 848 goto err; 849 850 dst_offset = 0; 851 first = NULL; 852 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 853 gop->flags = GNTCOPY_source_gref; 854 855 gop->source.u.ref = txp->gref; 856 gop->source.domid = vif->domid; 857 gop->source.offset = txp->offset; 858 859 gop->dest.domid = DOMID_SELF; 860 861 gop->dest.offset = dst_offset; 862 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 863 864 if (dst_offset + txp->size > PAGE_SIZE) { 865 /* This page can only merge a portion 866 * of tx request. Do not increment any 867 * pointer / counter here. The txp 868 * will be dealt with in future 869 * rounds, eventually hitting the 870 * `else` branch. 871 */ 872 gop->len = PAGE_SIZE - dst_offset; 873 txp->offset += gop->len; 874 txp->size -= gop->len; 875 dst_offset += gop->len; /* quit loop */ 876 } else { 877 /* This tx request can be merged in the page */ 878 gop->len = txp->size; 879 dst_offset += gop->len; 880 881 index = pending_index(vif->pending_cons++); 882 883 pending_idx = vif->pending_ring[index]; 884 885 memcpy(&pending_tx_info[pending_idx].req, txp, 886 sizeof(*txp)); 887 888 /* Poison these fields, corresponding 889 * fields for head tx req will be set 890 * to correct values after the loop. 891 */ 892 vif->mmap_pages[pending_idx] = (void *)(~0UL); 893 pending_tx_info[pending_idx].head = 894 INVALID_PENDING_RING_IDX; 895 896 if (!first) { 897 first = &pending_tx_info[pending_idx]; 898 start_idx = index; 899 head_idx = pending_idx; 900 } 901 902 txp++; 903 slot++; 904 } 905 906 gop++; 907 } 908 909 first->req.offset = 0; 910 first->req.size = dst_offset; 911 first->head = start_idx; 912 vif->mmap_pages[head_idx] = page; 913 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 914 } 915 916 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 917 918 return gop; 919 err: 920 /* Unwind, freeing all pages and sending error responses. */ 921 while (shinfo->nr_frags-- > start) { 922 xenvif_idx_release(vif, 923 frag_get_pending_idx(&frags[shinfo->nr_frags]), 924 XEN_NETIF_RSP_ERROR); 925 } 926 /* The head too, if necessary. */ 927 if (start) 928 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 929 930 return NULL; 931 } 932 933 static int xenvif_tx_check_gop(struct xenvif *vif, 934 struct sk_buff *skb, 935 struct gnttab_copy **gopp) 936 { 937 struct gnttab_copy *gop = *gopp; 938 u16 pending_idx = *((u16 *)skb->data); 939 struct skb_shared_info *shinfo = skb_shinfo(skb); 940 struct pending_tx_info *tx_info; 941 int nr_frags = shinfo->nr_frags; 942 int i, err, start; 943 u16 peek; /* peek into next tx request */ 944 945 /* Check status of header. */ 946 err = gop->status; 947 if (unlikely(err)) 948 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 949 950 /* Skip first skb fragment if it is on same page as header fragment. */ 951 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 952 953 for (i = start; i < nr_frags; i++) { 954 int j, newerr; 955 pending_ring_idx_t head; 956 957 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 958 tx_info = &vif->pending_tx_info[pending_idx]; 959 head = tx_info->head; 960 961 /* Check error status: if okay then remember grant handle. */ 962 do { 963 newerr = (++gop)->status; 964 if (newerr) 965 break; 966 peek = vif->pending_ring[pending_index(++head)]; 967 } while (!pending_tx_is_head(vif, peek)); 968 969 if (likely(!newerr)) { 970 /* Had a previous error? Invalidate this fragment. */ 971 if (unlikely(err)) 972 xenvif_idx_release(vif, pending_idx, 973 XEN_NETIF_RSP_OKAY); 974 continue; 975 } 976 977 /* Error on this fragment: respond to client with an error. */ 978 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 979 980 /* Not the first error? Preceding frags already invalidated. */ 981 if (err) 982 continue; 983 984 /* First error: invalidate header and preceding fragments. */ 985 pending_idx = *((u16 *)skb->data); 986 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 987 for (j = start; j < i; j++) { 988 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 989 xenvif_idx_release(vif, pending_idx, 990 XEN_NETIF_RSP_OKAY); 991 } 992 993 /* Remember the error: invalidate all subsequent fragments. */ 994 err = newerr; 995 } 996 997 *gopp = gop + 1; 998 return err; 999 } 1000 1001 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1002 { 1003 struct skb_shared_info *shinfo = skb_shinfo(skb); 1004 int nr_frags = shinfo->nr_frags; 1005 int i; 1006 1007 for (i = 0; i < nr_frags; i++) { 1008 skb_frag_t *frag = shinfo->frags + i; 1009 struct xen_netif_tx_request *txp; 1010 struct page *page; 1011 u16 pending_idx; 1012 1013 pending_idx = frag_get_pending_idx(frag); 1014 1015 txp = &vif->pending_tx_info[pending_idx].req; 1016 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1017 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1018 skb->len += txp->size; 1019 skb->data_len += txp->size; 1020 skb->truesize += txp->size; 1021 1022 /* Take an extra reference to offset xenvif_idx_release */ 1023 get_page(vif->mmap_pages[pending_idx]); 1024 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1025 } 1026 } 1027 1028 static int xenvif_get_extras(struct xenvif *vif, 1029 struct xen_netif_extra_info *extras, 1030 int work_to_do) 1031 { 1032 struct xen_netif_extra_info extra; 1033 RING_IDX cons = vif->tx.req_cons; 1034 1035 do { 1036 if (unlikely(work_to_do-- <= 0)) { 1037 netdev_err(vif->dev, "Missing extra info\n"); 1038 xenvif_fatal_tx_err(vif); 1039 return -EBADR; 1040 } 1041 1042 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1043 sizeof(extra)); 1044 if (unlikely(!extra.type || 1045 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1046 vif->tx.req_cons = ++cons; 1047 netdev_err(vif->dev, 1048 "Invalid extra type: %d\n", extra.type); 1049 xenvif_fatal_tx_err(vif); 1050 return -EINVAL; 1051 } 1052 1053 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1054 vif->tx.req_cons = ++cons; 1055 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1056 1057 return work_to_do; 1058 } 1059 1060 static int xenvif_set_skb_gso(struct xenvif *vif, 1061 struct sk_buff *skb, 1062 struct xen_netif_extra_info *gso) 1063 { 1064 if (!gso->u.gso.size) { 1065 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1066 xenvif_fatal_tx_err(vif); 1067 return -EINVAL; 1068 } 1069 1070 /* Currently only TCPv4 S.O. is supported. */ 1071 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1072 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1073 xenvif_fatal_tx_err(vif); 1074 return -EINVAL; 1075 } 1076 1077 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1078 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1079 1080 /* Header must be checked, and gso_segs computed. */ 1081 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1082 skb_shinfo(skb)->gso_segs = 0; 1083 1084 return 0; 1085 } 1086 1087 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1088 { 1089 struct iphdr *iph; 1090 int err = -EPROTO; 1091 int recalculate_partial_csum = 0; 1092 1093 /* 1094 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1095 * peers can fail to set NETRXF_csum_blank when sending a GSO 1096 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1097 * recalculate the partial checksum. 1098 */ 1099 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1100 vif->rx_gso_checksum_fixup++; 1101 skb->ip_summed = CHECKSUM_PARTIAL; 1102 recalculate_partial_csum = 1; 1103 } 1104 1105 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1106 if (skb->ip_summed != CHECKSUM_PARTIAL) 1107 return 0; 1108 1109 if (skb->protocol != htons(ETH_P_IP)) 1110 goto out; 1111 1112 iph = (void *)skb->data; 1113 switch (iph->protocol) { 1114 case IPPROTO_TCP: 1115 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1116 offsetof(struct tcphdr, check))) 1117 goto out; 1118 1119 if (recalculate_partial_csum) { 1120 struct tcphdr *tcph = tcp_hdr(skb); 1121 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1122 skb->len - iph->ihl*4, 1123 IPPROTO_TCP, 0); 1124 } 1125 break; 1126 case IPPROTO_UDP: 1127 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1128 offsetof(struct udphdr, check))) 1129 goto out; 1130 1131 if (recalculate_partial_csum) { 1132 struct udphdr *udph = udp_hdr(skb); 1133 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1134 skb->len - iph->ihl*4, 1135 IPPROTO_UDP, 0); 1136 } 1137 break; 1138 default: 1139 if (net_ratelimit()) 1140 netdev_err(vif->dev, 1141 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1142 iph->protocol); 1143 goto out; 1144 } 1145 1146 err = 0; 1147 1148 out: 1149 return err; 1150 } 1151 1152 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1153 { 1154 unsigned long now = jiffies; 1155 unsigned long next_credit = 1156 vif->credit_timeout.expires + 1157 msecs_to_jiffies(vif->credit_usec / 1000); 1158 1159 /* Timer could already be pending in rare cases. */ 1160 if (timer_pending(&vif->credit_timeout)) 1161 return true; 1162 1163 /* Passed the point where we can replenish credit? */ 1164 if (time_after_eq(now, next_credit)) { 1165 vif->credit_timeout.expires = now; 1166 tx_add_credit(vif); 1167 } 1168 1169 /* Still too big to send right now? Set a callback. */ 1170 if (size > vif->remaining_credit) { 1171 vif->credit_timeout.data = 1172 (unsigned long)vif; 1173 vif->credit_timeout.function = 1174 tx_credit_callback; 1175 mod_timer(&vif->credit_timeout, 1176 next_credit); 1177 1178 return true; 1179 } 1180 1181 return false; 1182 } 1183 1184 static unsigned xenvif_tx_build_gops(struct xenvif *vif) 1185 { 1186 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1187 struct sk_buff *skb; 1188 int ret; 1189 1190 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1191 < MAX_PENDING_REQS)) { 1192 struct xen_netif_tx_request txreq; 1193 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1194 struct page *page; 1195 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1196 u16 pending_idx; 1197 RING_IDX idx; 1198 int work_to_do; 1199 unsigned int data_len; 1200 pending_ring_idx_t index; 1201 1202 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1203 XEN_NETIF_TX_RING_SIZE) { 1204 netdev_err(vif->dev, 1205 "Impossible number of requests. " 1206 "req_prod %d, req_cons %d, size %ld\n", 1207 vif->tx.sring->req_prod, vif->tx.req_cons, 1208 XEN_NETIF_TX_RING_SIZE); 1209 xenvif_fatal_tx_err(vif); 1210 continue; 1211 } 1212 1213 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do); 1214 if (!work_to_do) 1215 break; 1216 1217 idx = vif->tx.req_cons; 1218 rmb(); /* Ensure that we see the request before we copy it. */ 1219 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1220 1221 /* Credit-based scheduling. */ 1222 if (txreq.size > vif->remaining_credit && 1223 tx_credit_exceeded(vif, txreq.size)) 1224 break; 1225 1226 vif->remaining_credit -= txreq.size; 1227 1228 work_to_do--; 1229 vif->tx.req_cons = ++idx; 1230 1231 memset(extras, 0, sizeof(extras)); 1232 if (txreq.flags & XEN_NETTXF_extra_info) { 1233 work_to_do = xenvif_get_extras(vif, extras, 1234 work_to_do); 1235 idx = vif->tx.req_cons; 1236 if (unlikely(work_to_do < 0)) 1237 break; 1238 } 1239 1240 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1241 if (unlikely(ret < 0)) 1242 break; 1243 1244 idx += ret; 1245 1246 if (unlikely(txreq.size < ETH_HLEN)) { 1247 netdev_dbg(vif->dev, 1248 "Bad packet size: %d\n", txreq.size); 1249 xenvif_tx_err(vif, &txreq, idx); 1250 break; 1251 } 1252 1253 /* No crossing a page as the payload mustn't fragment. */ 1254 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1255 netdev_err(vif->dev, 1256 "txreq.offset: %x, size: %u, end: %lu\n", 1257 txreq.offset, txreq.size, 1258 (txreq.offset&~PAGE_MASK) + txreq.size); 1259 xenvif_fatal_tx_err(vif); 1260 break; 1261 } 1262 1263 index = pending_index(vif->pending_cons); 1264 pending_idx = vif->pending_ring[index]; 1265 1266 data_len = (txreq.size > PKT_PROT_LEN && 1267 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1268 PKT_PROT_LEN : txreq.size; 1269 1270 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1271 GFP_ATOMIC | __GFP_NOWARN); 1272 if (unlikely(skb == NULL)) { 1273 netdev_dbg(vif->dev, 1274 "Can't allocate a skb in start_xmit.\n"); 1275 xenvif_tx_err(vif, &txreq, idx); 1276 break; 1277 } 1278 1279 /* Packets passed to netif_rx() must have some headroom. */ 1280 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1281 1282 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1283 struct xen_netif_extra_info *gso; 1284 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1285 1286 if (xenvif_set_skb_gso(vif, skb, gso)) { 1287 /* Failure in xenvif_set_skb_gso is fatal. */ 1288 kfree_skb(skb); 1289 break; 1290 } 1291 } 1292 1293 /* XXX could copy straight to head */ 1294 page = xenvif_alloc_page(vif, pending_idx); 1295 if (!page) { 1296 kfree_skb(skb); 1297 xenvif_tx_err(vif, &txreq, idx); 1298 break; 1299 } 1300 1301 gop->source.u.ref = txreq.gref; 1302 gop->source.domid = vif->domid; 1303 gop->source.offset = txreq.offset; 1304 1305 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1306 gop->dest.domid = DOMID_SELF; 1307 gop->dest.offset = txreq.offset; 1308 1309 gop->len = txreq.size; 1310 gop->flags = GNTCOPY_source_gref; 1311 1312 gop++; 1313 1314 memcpy(&vif->pending_tx_info[pending_idx].req, 1315 &txreq, sizeof(txreq)); 1316 vif->pending_tx_info[pending_idx].head = index; 1317 *((u16 *)skb->data) = pending_idx; 1318 1319 __skb_put(skb, data_len); 1320 1321 skb_shinfo(skb)->nr_frags = ret; 1322 if (data_len < txreq.size) { 1323 skb_shinfo(skb)->nr_frags++; 1324 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1325 pending_idx); 1326 } else { 1327 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1328 INVALID_PENDING_IDX); 1329 } 1330 1331 vif->pending_cons++; 1332 1333 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1334 if (request_gop == NULL) { 1335 kfree_skb(skb); 1336 xenvif_tx_err(vif, &txreq, idx); 1337 break; 1338 } 1339 gop = request_gop; 1340 1341 __skb_queue_tail(&vif->tx_queue, skb); 1342 1343 vif->tx.req_cons = idx; 1344 1345 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1346 break; 1347 } 1348 1349 return gop - vif->tx_copy_ops; 1350 } 1351 1352 1353 static int xenvif_tx_submit(struct xenvif *vif, int budget) 1354 { 1355 struct gnttab_copy *gop = vif->tx_copy_ops; 1356 struct sk_buff *skb; 1357 int work_done = 0; 1358 1359 while (work_done < budget && 1360 (skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1361 struct xen_netif_tx_request *txp; 1362 u16 pending_idx; 1363 unsigned data_len; 1364 1365 pending_idx = *((u16 *)skb->data); 1366 txp = &vif->pending_tx_info[pending_idx].req; 1367 1368 /* Check the remap error code. */ 1369 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { 1370 netdev_dbg(vif->dev, "netback grant failed.\n"); 1371 skb_shinfo(skb)->nr_frags = 0; 1372 kfree_skb(skb); 1373 continue; 1374 } 1375 1376 data_len = skb->len; 1377 memcpy(skb->data, 1378 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1379 data_len); 1380 if (data_len < txp->size) { 1381 /* Append the packet payload as a fragment. */ 1382 txp->offset += data_len; 1383 txp->size -= data_len; 1384 } else { 1385 /* Schedule a response immediately. */ 1386 xenvif_idx_release(vif, pending_idx, 1387 XEN_NETIF_RSP_OKAY); 1388 } 1389 1390 if (txp->flags & XEN_NETTXF_csum_blank) 1391 skb->ip_summed = CHECKSUM_PARTIAL; 1392 else if (txp->flags & XEN_NETTXF_data_validated) 1393 skb->ip_summed = CHECKSUM_UNNECESSARY; 1394 1395 xenvif_fill_frags(vif, skb); 1396 1397 /* 1398 * If the initial fragment was < PKT_PROT_LEN then 1399 * pull through some bytes from the other fragments to 1400 * increase the linear region to PKT_PROT_LEN bytes. 1401 */ 1402 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { 1403 int target = min_t(int, skb->len, PKT_PROT_LEN); 1404 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1405 } 1406 1407 skb->dev = vif->dev; 1408 skb->protocol = eth_type_trans(skb, skb->dev); 1409 skb_reset_network_header(skb); 1410 1411 if (checksum_setup(vif, skb)) { 1412 netdev_dbg(vif->dev, 1413 "Can't setup checksum in net_tx_action\n"); 1414 kfree_skb(skb); 1415 continue; 1416 } 1417 1418 skb_probe_transport_header(skb, 0); 1419 1420 vif->dev->stats.rx_bytes += skb->len; 1421 vif->dev->stats.rx_packets++; 1422 1423 work_done++; 1424 1425 netif_receive_skb(skb); 1426 } 1427 1428 return work_done; 1429 } 1430 1431 /* Called after netfront has transmitted */ 1432 int xenvif_tx_action(struct xenvif *vif, int budget) 1433 { 1434 unsigned nr_gops; 1435 int work_done; 1436 1437 if (unlikely(!tx_work_todo(vif))) 1438 return 0; 1439 1440 nr_gops = xenvif_tx_build_gops(vif); 1441 1442 if (nr_gops == 0) 1443 return 0; 1444 1445 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1446 1447 work_done = xenvif_tx_submit(vif, nr_gops); 1448 1449 return work_done; 1450 } 1451 1452 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1453 u8 status) 1454 { 1455 struct pending_tx_info *pending_tx_info; 1456 pending_ring_idx_t head; 1457 u16 peek; /* peek into next tx request */ 1458 1459 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1460 1461 /* Already complete? */ 1462 if (vif->mmap_pages[pending_idx] == NULL) 1463 return; 1464 1465 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1466 1467 head = pending_tx_info->head; 1468 1469 BUG_ON(!pending_tx_is_head(vif, head)); 1470 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); 1471 1472 do { 1473 pending_ring_idx_t index; 1474 pending_ring_idx_t idx = pending_index(head); 1475 u16 info_idx = vif->pending_ring[idx]; 1476 1477 pending_tx_info = &vif->pending_tx_info[info_idx]; 1478 make_tx_response(vif, &pending_tx_info->req, status); 1479 1480 /* Setting any number other than 1481 * INVALID_PENDING_RING_IDX indicates this slot is 1482 * starting a new packet / ending a previous packet. 1483 */ 1484 pending_tx_info->head = 0; 1485 1486 index = pending_index(vif->pending_prod++); 1487 vif->pending_ring[index] = vif->pending_ring[info_idx]; 1488 1489 peek = vif->pending_ring[pending_index(++head)]; 1490 1491 } while (!pending_tx_is_head(vif, peek)); 1492 1493 put_page(vif->mmap_pages[pending_idx]); 1494 vif->mmap_pages[pending_idx] = NULL; 1495 } 1496 1497 1498 static void make_tx_response(struct xenvif *vif, 1499 struct xen_netif_tx_request *txp, 1500 s8 st) 1501 { 1502 RING_IDX i = vif->tx.rsp_prod_pvt; 1503 struct xen_netif_tx_response *resp; 1504 int notify; 1505 1506 resp = RING_GET_RESPONSE(&vif->tx, i); 1507 resp->id = txp->id; 1508 resp->status = st; 1509 1510 if (txp->flags & XEN_NETTXF_extra_info) 1511 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1512 1513 vif->tx.rsp_prod_pvt = ++i; 1514 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1515 if (notify) 1516 notify_remote_via_irq(vif->tx_irq); 1517 } 1518 1519 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1520 u16 id, 1521 s8 st, 1522 u16 offset, 1523 u16 size, 1524 u16 flags) 1525 { 1526 RING_IDX i = vif->rx.rsp_prod_pvt; 1527 struct xen_netif_rx_response *resp; 1528 1529 resp = RING_GET_RESPONSE(&vif->rx, i); 1530 resp->offset = offset; 1531 resp->flags = flags; 1532 resp->id = id; 1533 resp->status = (s16)size; 1534 if (st < 0) 1535 resp->status = (s16)st; 1536 1537 vif->rx.rsp_prod_pvt = ++i; 1538 1539 return resp; 1540 } 1541 1542 static inline int rx_work_todo(struct xenvif *vif) 1543 { 1544 return !skb_queue_empty(&vif->rx_queue); 1545 } 1546 1547 static inline int tx_work_todo(struct xenvif *vif) 1548 { 1549 1550 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && 1551 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1552 < MAX_PENDING_REQS)) 1553 return 1; 1554 1555 return 0; 1556 } 1557 1558 void xenvif_unmap_frontend_rings(struct xenvif *vif) 1559 { 1560 if (vif->tx.sring) 1561 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1562 vif->tx.sring); 1563 if (vif->rx.sring) 1564 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1565 vif->rx.sring); 1566 } 1567 1568 int xenvif_map_frontend_rings(struct xenvif *vif, 1569 grant_ref_t tx_ring_ref, 1570 grant_ref_t rx_ring_ref) 1571 { 1572 void *addr; 1573 struct xen_netif_tx_sring *txs; 1574 struct xen_netif_rx_sring *rxs; 1575 1576 int err = -ENOMEM; 1577 1578 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1579 tx_ring_ref, &addr); 1580 if (err) 1581 goto err; 1582 1583 txs = (struct xen_netif_tx_sring *)addr; 1584 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1585 1586 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1587 rx_ring_ref, &addr); 1588 if (err) 1589 goto err; 1590 1591 rxs = (struct xen_netif_rx_sring *)addr; 1592 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1593 1594 vif->rx_req_cons_peek = 0; 1595 1596 return 0; 1597 1598 err: 1599 xenvif_unmap_frontend_rings(vif); 1600 return err; 1601 } 1602 1603 int xenvif_kthread(void *data) 1604 { 1605 struct xenvif *vif = data; 1606 1607 while (!kthread_should_stop()) { 1608 wait_event_interruptible(vif->wq, 1609 rx_work_todo(vif) || 1610 kthread_should_stop()); 1611 if (kthread_should_stop()) 1612 break; 1613 1614 if (rx_work_todo(vif)) 1615 xenvif_rx_action(vif); 1616 1617 cond_resched(); 1618 } 1619 1620 return 0; 1621 } 1622 1623 static int __init netback_init(void) 1624 { 1625 int rc = 0; 1626 1627 if (!xen_domain()) 1628 return -ENODEV; 1629 1630 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1631 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1632 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1633 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1634 } 1635 1636 rc = xenvif_xenbus_init(); 1637 if (rc) 1638 goto failed_init; 1639 1640 return 0; 1641 1642 failed_init: 1643 return rc; 1644 } 1645 1646 module_init(netback_init); 1647 1648 static void __exit netback_fini(void) 1649 { 1650 xenvif_xenbus_fini(); 1651 } 1652 module_exit(netback_fini); 1653 1654 MODULE_LICENSE("Dual BSD/GPL"); 1655 MODULE_ALIAS("xen-backend:vif"); 1656