1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 #include <net/ip6_checksum.h> 43 44 #include <xen/xen.h> 45 #include <xen/events.h> 46 #include <xen/interface/memory.h> 47 48 #include <asm/xen/hypercall.h> 49 #include <asm/xen/page.h> 50 51 /* Provide an option to disable split event channels at load time as 52 * event channels are limited resource. Split event channels are 53 * enabled by default. 54 */ 55 bool separate_tx_rx_irq = 1; 56 module_param(separate_tx_rx_irq, bool, 0644); 57 58 /* 59 * This is the maximum slots a skb can have. If a guest sends a skb 60 * which exceeds this limit it is considered malicious. 61 */ 62 #define FATAL_SKB_SLOTS_DEFAULT 20 63 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 64 module_param(fatal_skb_slots, uint, 0444); 65 66 /* 67 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 68 * the maximum slots a valid packet can use. Now this value is defined 69 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 70 * all backend. 71 */ 72 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 73 74 /* 75 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 76 * one or more merged tx requests, otherwise it is the continuation of 77 * previous tx request. 78 */ 79 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) 80 { 81 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 82 } 83 84 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 85 u8 status); 86 87 static void make_tx_response(struct xenvif *vif, 88 struct xen_netif_tx_request *txp, 89 s8 st); 90 91 static inline int tx_work_todo(struct xenvif *vif); 92 static inline int rx_work_todo(struct xenvif *vif); 93 94 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 95 u16 id, 96 s8 st, 97 u16 offset, 98 u16 size, 99 u16 flags); 100 101 static inline unsigned long idx_to_pfn(struct xenvif *vif, 102 u16 idx) 103 { 104 return page_to_pfn(vif->mmap_pages[idx]); 105 } 106 107 static inline unsigned long idx_to_kaddr(struct xenvif *vif, 108 u16 idx) 109 { 110 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 111 } 112 113 /* This is a miniumum size for the linear area to avoid lots of 114 * calls to __pskb_pull_tail() as we set up checksum offsets. The 115 * value 128 was chosen as it covers all IPv4 and most likely 116 * IPv6 headers. 117 */ 118 #define PKT_PROT_LEN 128 119 120 static u16 frag_get_pending_idx(skb_frag_t *frag) 121 { 122 return (u16)frag->page_offset; 123 } 124 125 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 126 { 127 frag->page_offset = pending_idx; 128 } 129 130 static inline pending_ring_idx_t pending_index(unsigned i) 131 { 132 return i & (MAX_PENDING_REQS-1); 133 } 134 135 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 136 { 137 return MAX_PENDING_REQS - 138 vif->pending_prod + vif->pending_cons; 139 } 140 141 static int max_required_rx_slots(struct xenvif *vif) 142 { 143 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 144 145 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 146 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask) 147 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 148 149 return max; 150 } 151 152 int xenvif_rx_ring_full(struct xenvif *vif) 153 { 154 RING_IDX peek = vif->rx_req_cons_peek; 155 RING_IDX needed = max_required_rx_slots(vif); 156 157 return ((vif->rx.sring->req_prod - peek) < needed) || 158 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 159 } 160 161 int xenvif_must_stop_queue(struct xenvif *vif) 162 { 163 if (!xenvif_rx_ring_full(vif)) 164 return 0; 165 166 vif->rx.sring->req_event = vif->rx_req_cons_peek + 167 max_required_rx_slots(vif); 168 mb(); /* request notification /then/ check the queue */ 169 170 return xenvif_rx_ring_full(vif); 171 } 172 173 /* 174 * Returns true if we should start a new receive buffer instead of 175 * adding 'size' bytes to a buffer which currently contains 'offset' 176 * bytes. 177 */ 178 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 179 { 180 /* simple case: we have completely filled the current buffer. */ 181 if (offset == MAX_BUFFER_OFFSET) 182 return true; 183 184 /* 185 * complex case: start a fresh buffer if the current frag 186 * would overflow the current buffer but only if: 187 * (i) this frag would fit completely in the next buffer 188 * and (ii) there is already some data in the current buffer 189 * and (iii) this is not the head buffer. 190 * 191 * Where: 192 * - (i) stops us splitting a frag into two copies 193 * unless the frag is too large for a single buffer. 194 * - (ii) stops us from leaving a buffer pointlessly empty. 195 * - (iii) stops us leaving the first buffer 196 * empty. Strictly speaking this is already covered 197 * by (ii) but is explicitly checked because 198 * netfront relies on the first buffer being 199 * non-empty and can crash otherwise. 200 * 201 * This means we will effectively linearise small 202 * frags but do not needlessly split large buffers 203 * into multiple copies tend to give large frags their 204 * own buffers as before. 205 */ 206 if ((offset + size > MAX_BUFFER_OFFSET) && 207 (size <= MAX_BUFFER_OFFSET) && offset && !head) 208 return true; 209 210 return false; 211 } 212 213 struct xenvif_count_slot_state { 214 unsigned long copy_off; 215 bool head; 216 }; 217 218 unsigned int xenvif_count_frag_slots(struct xenvif *vif, 219 unsigned long offset, unsigned long size, 220 struct xenvif_count_slot_state *state) 221 { 222 unsigned count = 0; 223 224 offset &= ~PAGE_MASK; 225 226 while (size > 0) { 227 unsigned long bytes; 228 229 bytes = PAGE_SIZE - offset; 230 231 if (bytes > size) 232 bytes = size; 233 234 if (start_new_rx_buffer(state->copy_off, bytes, state->head)) { 235 count++; 236 state->copy_off = 0; 237 } 238 239 if (state->copy_off + bytes > MAX_BUFFER_OFFSET) 240 bytes = MAX_BUFFER_OFFSET - state->copy_off; 241 242 state->copy_off += bytes; 243 244 offset += bytes; 245 size -= bytes; 246 247 if (offset == PAGE_SIZE) 248 offset = 0; 249 250 state->head = false; 251 } 252 253 return count; 254 } 255 256 /* 257 * Figure out how many ring slots we're going to need to send @skb to 258 * the guest. This function is essentially a dry run of 259 * xenvif_gop_frag_copy. 260 */ 261 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 262 { 263 struct xenvif_count_slot_state state; 264 unsigned int count; 265 unsigned char *data; 266 unsigned i; 267 268 state.head = true; 269 state.copy_off = 0; 270 271 /* Slot for the first (partial) page of data. */ 272 count = 1; 273 274 /* Need a slot for the GSO prefix for GSO extra data? */ 275 if (skb_shinfo(skb)->gso_size) 276 count++; 277 278 data = skb->data; 279 while (data < skb_tail_pointer(skb)) { 280 unsigned long offset = offset_in_page(data); 281 unsigned long size = PAGE_SIZE - offset; 282 283 if (data + size > skb_tail_pointer(skb)) 284 size = skb_tail_pointer(skb) - data; 285 286 count += xenvif_count_frag_slots(vif, offset, size, &state); 287 288 data += size; 289 } 290 291 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 292 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 293 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 294 295 count += xenvif_count_frag_slots(vif, offset, size, &state); 296 } 297 return count; 298 } 299 300 struct netrx_pending_operations { 301 unsigned copy_prod, copy_cons; 302 unsigned meta_prod, meta_cons; 303 struct gnttab_copy *copy; 304 struct xenvif_rx_meta *meta; 305 int copy_off; 306 grant_ref_t copy_gref; 307 }; 308 309 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 310 struct netrx_pending_operations *npo) 311 { 312 struct xenvif_rx_meta *meta; 313 struct xen_netif_rx_request *req; 314 315 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 316 317 meta = npo->meta + npo->meta_prod++; 318 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 319 meta->gso_size = 0; 320 meta->size = 0; 321 meta->id = req->id; 322 323 npo->copy_off = 0; 324 npo->copy_gref = req->gref; 325 326 return meta; 327 } 328 329 /* 330 * Set up the grant operations for this fragment. If it's a flipping 331 * interface, we also set up the unmap request from here. 332 */ 333 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 334 struct netrx_pending_operations *npo, 335 struct page *page, unsigned long size, 336 unsigned long offset, int *head) 337 { 338 struct gnttab_copy *copy_gop; 339 struct xenvif_rx_meta *meta; 340 unsigned long bytes; 341 int gso_type; 342 343 /* Data must not cross a page boundary. */ 344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 345 346 meta = npo->meta + npo->meta_prod - 1; 347 348 /* Skip unused frames from start of page */ 349 page += offset >> PAGE_SHIFT; 350 offset &= ~PAGE_MASK; 351 352 while (size > 0) { 353 BUG_ON(offset >= PAGE_SIZE); 354 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 355 356 bytes = PAGE_SIZE - offset; 357 358 if (bytes > size) 359 bytes = size; 360 361 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 362 /* 363 * Netfront requires there to be some data in the head 364 * buffer. 365 */ 366 BUG_ON(*head); 367 368 meta = get_next_rx_buffer(vif, npo); 369 } 370 371 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 372 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 373 374 copy_gop = npo->copy + npo->copy_prod++; 375 copy_gop->flags = GNTCOPY_dest_gref; 376 copy_gop->len = bytes; 377 378 copy_gop->source.domid = DOMID_SELF; 379 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 380 copy_gop->source.offset = offset; 381 382 copy_gop->dest.domid = vif->domid; 383 copy_gop->dest.offset = npo->copy_off; 384 copy_gop->dest.u.ref = npo->copy_gref; 385 386 npo->copy_off += bytes; 387 meta->size += bytes; 388 389 offset += bytes; 390 size -= bytes; 391 392 /* Next frame */ 393 if (offset == PAGE_SIZE && size) { 394 BUG_ON(!PageCompound(page)); 395 page++; 396 offset = 0; 397 } 398 399 /* Leave a gap for the GSO descriptor. */ 400 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 401 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 402 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 403 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 404 else 405 gso_type = XEN_NETIF_GSO_TYPE_NONE; 406 407 if (*head && ((1 << gso_type) & vif->gso_mask)) 408 vif->rx.req_cons++; 409 410 *head = 0; /* There must be something in this buffer now. */ 411 412 } 413 } 414 415 /* 416 * Prepare an SKB to be transmitted to the frontend. 417 * 418 * This function is responsible for allocating grant operations, meta 419 * structures, etc. 420 * 421 * It returns the number of meta structures consumed. The number of 422 * ring slots used is always equal to the number of meta slots used 423 * plus the number of GSO descriptors used. Currently, we use either 424 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 425 * frontend-side LRO). 426 */ 427 static int xenvif_gop_skb(struct sk_buff *skb, 428 struct netrx_pending_operations *npo) 429 { 430 struct xenvif *vif = netdev_priv(skb->dev); 431 int nr_frags = skb_shinfo(skb)->nr_frags; 432 int i; 433 struct xen_netif_rx_request *req; 434 struct xenvif_rx_meta *meta; 435 unsigned char *data; 436 int head = 1; 437 int old_meta_prod; 438 int gso_type; 439 int gso_size; 440 441 old_meta_prod = npo->meta_prod; 442 443 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { 444 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 445 gso_size = skb_shinfo(skb)->gso_size; 446 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { 447 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 448 gso_size = skb_shinfo(skb)->gso_size; 449 } else { 450 gso_type = XEN_NETIF_GSO_TYPE_NONE; 451 gso_size = 0; 452 } 453 454 /* Set up a GSO prefix descriptor, if necessary */ 455 if ((1 << gso_type) & vif->gso_prefix_mask) { 456 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 457 meta = npo->meta + npo->meta_prod++; 458 meta->gso_type = gso_type; 459 meta->gso_size = gso_size; 460 meta->size = 0; 461 meta->id = req->id; 462 } 463 464 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 465 meta = npo->meta + npo->meta_prod++; 466 467 if ((1 << gso_type) & vif->gso_mask) { 468 meta->gso_type = gso_type; 469 meta->gso_size = gso_size; 470 } else { 471 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 472 meta->gso_size = 0; 473 } 474 475 meta->size = 0; 476 meta->id = req->id; 477 npo->copy_off = 0; 478 npo->copy_gref = req->gref; 479 480 data = skb->data; 481 while (data < skb_tail_pointer(skb)) { 482 unsigned int offset = offset_in_page(data); 483 unsigned int len = PAGE_SIZE - offset; 484 485 if (data + len > skb_tail_pointer(skb)) 486 len = skb_tail_pointer(skb) - data; 487 488 xenvif_gop_frag_copy(vif, skb, npo, 489 virt_to_page(data), len, offset, &head); 490 data += len; 491 } 492 493 for (i = 0; i < nr_frags; i++) { 494 xenvif_gop_frag_copy(vif, skb, npo, 495 skb_frag_page(&skb_shinfo(skb)->frags[i]), 496 skb_frag_size(&skb_shinfo(skb)->frags[i]), 497 skb_shinfo(skb)->frags[i].page_offset, 498 &head); 499 } 500 501 return npo->meta_prod - old_meta_prod; 502 } 503 504 /* 505 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was 506 * used to set up the operations on the top of 507 * netrx_pending_operations, which have since been done. Check that 508 * they didn't give any errors and advance over them. 509 */ 510 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, 511 struct netrx_pending_operations *npo) 512 { 513 struct gnttab_copy *copy_op; 514 int status = XEN_NETIF_RSP_OKAY; 515 int i; 516 517 for (i = 0; i < nr_meta_slots; i++) { 518 copy_op = npo->copy + npo->copy_cons++; 519 if (copy_op->status != GNTST_okay) { 520 netdev_dbg(vif->dev, 521 "Bad status %d from copy to DOM%d.\n", 522 copy_op->status, vif->domid); 523 status = XEN_NETIF_RSP_ERROR; 524 } 525 } 526 527 return status; 528 } 529 530 static void xenvif_add_frag_responses(struct xenvif *vif, int status, 531 struct xenvif_rx_meta *meta, 532 int nr_meta_slots) 533 { 534 int i; 535 unsigned long offset; 536 537 /* No fragments used */ 538 if (nr_meta_slots <= 1) 539 return; 540 541 nr_meta_slots--; 542 543 for (i = 0; i < nr_meta_slots; i++) { 544 int flags; 545 if (i == nr_meta_slots - 1) 546 flags = 0; 547 else 548 flags = XEN_NETRXF_more_data; 549 550 offset = 0; 551 make_rx_response(vif, meta[i].id, status, offset, 552 meta[i].size, flags); 553 } 554 } 555 556 struct skb_cb_overlay { 557 int meta_slots_used; 558 }; 559 560 static void xenvif_kick_thread(struct xenvif *vif) 561 { 562 wake_up(&vif->wq); 563 } 564 565 void xenvif_rx_action(struct xenvif *vif) 566 { 567 s8 status; 568 u16 flags; 569 struct xen_netif_rx_response *resp; 570 struct sk_buff_head rxq; 571 struct sk_buff *skb; 572 LIST_HEAD(notify); 573 int ret; 574 int nr_frags; 575 int count; 576 unsigned long offset; 577 struct skb_cb_overlay *sco; 578 int need_to_notify = 0; 579 580 struct netrx_pending_operations npo = { 581 .copy = vif->grant_copy_op, 582 .meta = vif->meta, 583 }; 584 585 skb_queue_head_init(&rxq); 586 587 count = 0; 588 589 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 590 vif = netdev_priv(skb->dev); 591 nr_frags = skb_shinfo(skb)->nr_frags; 592 593 sco = (struct skb_cb_overlay *)skb->cb; 594 sco->meta_slots_used = xenvif_gop_skb(skb, &npo); 595 596 count += nr_frags + 1; 597 598 __skb_queue_tail(&rxq, skb); 599 600 /* Filled the batch queue? */ 601 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 602 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 603 break; 604 } 605 606 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 607 608 if (!npo.copy_prod) 609 return; 610 611 BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); 612 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 613 614 while ((skb = __skb_dequeue(&rxq)) != NULL) { 615 sco = (struct skb_cb_overlay *)skb->cb; 616 617 vif = netdev_priv(skb->dev); 618 619 if ((1 << vif->meta[npo.meta_cons].gso_type) & 620 vif->gso_prefix_mask) { 621 resp = RING_GET_RESPONSE(&vif->rx, 622 vif->rx.rsp_prod_pvt++); 623 624 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 625 626 resp->offset = vif->meta[npo.meta_cons].gso_size; 627 resp->id = vif->meta[npo.meta_cons].id; 628 resp->status = sco->meta_slots_used; 629 630 npo.meta_cons++; 631 sco->meta_slots_used--; 632 } 633 634 635 vif->dev->stats.tx_bytes += skb->len; 636 vif->dev->stats.tx_packets++; 637 638 status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); 639 640 if (sco->meta_slots_used == 1) 641 flags = 0; 642 else 643 flags = XEN_NETRXF_more_data; 644 645 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 646 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 647 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 648 /* remote but checksummed. */ 649 flags |= XEN_NETRXF_data_validated; 650 651 offset = 0; 652 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 653 status, offset, 654 vif->meta[npo.meta_cons].size, 655 flags); 656 657 if ((1 << vif->meta[npo.meta_cons].gso_type) & 658 vif->gso_mask) { 659 struct xen_netif_extra_info *gso = 660 (struct xen_netif_extra_info *) 661 RING_GET_RESPONSE(&vif->rx, 662 vif->rx.rsp_prod_pvt++); 663 664 resp->flags |= XEN_NETRXF_extra_info; 665 666 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; 667 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 668 gso->u.gso.pad = 0; 669 gso->u.gso.features = 0; 670 671 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 672 gso->flags = 0; 673 } 674 675 xenvif_add_frag_responses(vif, status, 676 vif->meta + npo.meta_cons + 1, 677 sco->meta_slots_used); 678 679 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 680 681 if (ret) 682 need_to_notify = 1; 683 684 xenvif_notify_tx_completion(vif); 685 686 npo.meta_cons += sco->meta_slots_used; 687 dev_kfree_skb(skb); 688 } 689 690 if (need_to_notify) 691 notify_remote_via_irq(vif->rx_irq); 692 693 /* More work to do? */ 694 if (!skb_queue_empty(&vif->rx_queue)) 695 xenvif_kick_thread(vif); 696 } 697 698 void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 699 { 700 skb_queue_tail(&vif->rx_queue, skb); 701 702 xenvif_kick_thread(vif); 703 } 704 705 void xenvif_check_rx_xenvif(struct xenvif *vif) 706 { 707 int more_to_do; 708 709 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 710 711 if (more_to_do) 712 napi_schedule(&vif->napi); 713 } 714 715 static void tx_add_credit(struct xenvif *vif) 716 { 717 unsigned long max_burst, max_credit; 718 719 /* 720 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 721 * Otherwise the interface can seize up due to insufficient credit. 722 */ 723 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 724 max_burst = min(max_burst, 131072UL); 725 max_burst = max(max_burst, vif->credit_bytes); 726 727 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 728 max_credit = vif->remaining_credit + vif->credit_bytes; 729 if (max_credit < vif->remaining_credit) 730 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 731 732 vif->remaining_credit = min(max_credit, max_burst); 733 } 734 735 static void tx_credit_callback(unsigned long data) 736 { 737 struct xenvif *vif = (struct xenvif *)data; 738 tx_add_credit(vif); 739 xenvif_check_rx_xenvif(vif); 740 } 741 742 static void xenvif_tx_err(struct xenvif *vif, 743 struct xen_netif_tx_request *txp, RING_IDX end) 744 { 745 RING_IDX cons = vif->tx.req_cons; 746 747 do { 748 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 749 if (cons == end) 750 break; 751 txp = RING_GET_REQUEST(&vif->tx, cons++); 752 } while (1); 753 vif->tx.req_cons = cons; 754 } 755 756 static void xenvif_fatal_tx_err(struct xenvif *vif) 757 { 758 netdev_err(vif->dev, "fatal error; disabling device\n"); 759 xenvif_carrier_off(vif); 760 } 761 762 static int xenvif_count_requests(struct xenvif *vif, 763 struct xen_netif_tx_request *first, 764 struct xen_netif_tx_request *txp, 765 int work_to_do) 766 { 767 RING_IDX cons = vif->tx.req_cons; 768 int slots = 0; 769 int drop_err = 0; 770 int more_data; 771 772 if (!(first->flags & XEN_NETTXF_more_data)) 773 return 0; 774 775 do { 776 struct xen_netif_tx_request dropped_tx = { 0 }; 777 778 if (slots >= work_to_do) { 779 netdev_err(vif->dev, 780 "Asked for %d slots but exceeds this limit\n", 781 work_to_do); 782 xenvif_fatal_tx_err(vif); 783 return -ENODATA; 784 } 785 786 /* This guest is really using too many slots and 787 * considered malicious. 788 */ 789 if (unlikely(slots >= fatal_skb_slots)) { 790 netdev_err(vif->dev, 791 "Malicious frontend using %d slots, threshold %u\n", 792 slots, fatal_skb_slots); 793 xenvif_fatal_tx_err(vif); 794 return -E2BIG; 795 } 796 797 /* Xen network protocol had implicit dependency on 798 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 799 * the historical MAX_SKB_FRAGS value 18 to honor the 800 * same behavior as before. Any packet using more than 801 * 18 slots but less than fatal_skb_slots slots is 802 * dropped 803 */ 804 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 805 if (net_ratelimit()) 806 netdev_dbg(vif->dev, 807 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 808 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 809 drop_err = -E2BIG; 810 } 811 812 if (drop_err) 813 txp = &dropped_tx; 814 815 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 816 sizeof(*txp)); 817 818 /* If the guest submitted a frame >= 64 KiB then 819 * first->size overflowed and following slots will 820 * appear to be larger than the frame. 821 * 822 * This cannot be fatal error as there are buggy 823 * frontends that do this. 824 * 825 * Consume all slots and drop the packet. 826 */ 827 if (!drop_err && txp->size > first->size) { 828 if (net_ratelimit()) 829 netdev_dbg(vif->dev, 830 "Invalid tx request, slot size %u > remaining size %u\n", 831 txp->size, first->size); 832 drop_err = -EIO; 833 } 834 835 first->size -= txp->size; 836 slots++; 837 838 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 839 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 840 txp->offset, txp->size); 841 xenvif_fatal_tx_err(vif); 842 return -EINVAL; 843 } 844 845 more_data = txp->flags & XEN_NETTXF_more_data; 846 847 if (!drop_err) 848 txp++; 849 850 } while (more_data); 851 852 if (drop_err) { 853 xenvif_tx_err(vif, first, cons + slots); 854 return drop_err; 855 } 856 857 return slots; 858 } 859 860 static struct page *xenvif_alloc_page(struct xenvif *vif, 861 u16 pending_idx) 862 { 863 struct page *page; 864 865 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 866 if (!page) 867 return NULL; 868 vif->mmap_pages[pending_idx] = page; 869 870 return page; 871 } 872 873 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 874 struct sk_buff *skb, 875 struct xen_netif_tx_request *txp, 876 struct gnttab_copy *gop) 877 { 878 struct skb_shared_info *shinfo = skb_shinfo(skb); 879 skb_frag_t *frags = shinfo->frags; 880 u16 pending_idx = *((u16 *)skb->data); 881 u16 head_idx = 0; 882 int slot, start; 883 struct page *page; 884 pending_ring_idx_t index, start_idx = 0; 885 uint16_t dst_offset; 886 unsigned int nr_slots; 887 struct pending_tx_info *first = NULL; 888 889 /* At this point shinfo->nr_frags is in fact the number of 890 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 891 */ 892 nr_slots = shinfo->nr_frags; 893 894 /* Skip first skb fragment if it is on same page as header fragment. */ 895 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 896 897 /* Coalesce tx requests, at this point the packet passed in 898 * should be <= 64K. Any packets larger than 64K have been 899 * handled in xenvif_count_requests(). 900 */ 901 for (shinfo->nr_frags = slot = start; slot < nr_slots; 902 shinfo->nr_frags++) { 903 struct pending_tx_info *pending_tx_info = 904 vif->pending_tx_info; 905 906 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 907 if (!page) 908 goto err; 909 910 dst_offset = 0; 911 first = NULL; 912 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 913 gop->flags = GNTCOPY_source_gref; 914 915 gop->source.u.ref = txp->gref; 916 gop->source.domid = vif->domid; 917 gop->source.offset = txp->offset; 918 919 gop->dest.domid = DOMID_SELF; 920 921 gop->dest.offset = dst_offset; 922 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 923 924 if (dst_offset + txp->size > PAGE_SIZE) { 925 /* This page can only merge a portion 926 * of tx request. Do not increment any 927 * pointer / counter here. The txp 928 * will be dealt with in future 929 * rounds, eventually hitting the 930 * `else` branch. 931 */ 932 gop->len = PAGE_SIZE - dst_offset; 933 txp->offset += gop->len; 934 txp->size -= gop->len; 935 dst_offset += gop->len; /* quit loop */ 936 } else { 937 /* This tx request can be merged in the page */ 938 gop->len = txp->size; 939 dst_offset += gop->len; 940 941 index = pending_index(vif->pending_cons++); 942 943 pending_idx = vif->pending_ring[index]; 944 945 memcpy(&pending_tx_info[pending_idx].req, txp, 946 sizeof(*txp)); 947 948 /* Poison these fields, corresponding 949 * fields for head tx req will be set 950 * to correct values after the loop. 951 */ 952 vif->mmap_pages[pending_idx] = (void *)(~0UL); 953 pending_tx_info[pending_idx].head = 954 INVALID_PENDING_RING_IDX; 955 956 if (!first) { 957 first = &pending_tx_info[pending_idx]; 958 start_idx = index; 959 head_idx = pending_idx; 960 } 961 962 txp++; 963 slot++; 964 } 965 966 gop++; 967 } 968 969 first->req.offset = 0; 970 first->req.size = dst_offset; 971 first->head = start_idx; 972 vif->mmap_pages[head_idx] = page; 973 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 974 } 975 976 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 977 978 return gop; 979 err: 980 /* Unwind, freeing all pages and sending error responses. */ 981 while (shinfo->nr_frags-- > start) { 982 xenvif_idx_release(vif, 983 frag_get_pending_idx(&frags[shinfo->nr_frags]), 984 XEN_NETIF_RSP_ERROR); 985 } 986 /* The head too, if necessary. */ 987 if (start) 988 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 989 990 return NULL; 991 } 992 993 static int xenvif_tx_check_gop(struct xenvif *vif, 994 struct sk_buff *skb, 995 struct gnttab_copy **gopp) 996 { 997 struct gnttab_copy *gop = *gopp; 998 u16 pending_idx = *((u16 *)skb->data); 999 struct skb_shared_info *shinfo = skb_shinfo(skb); 1000 struct pending_tx_info *tx_info; 1001 int nr_frags = shinfo->nr_frags; 1002 int i, err, start; 1003 u16 peek; /* peek into next tx request */ 1004 1005 /* Check status of header. */ 1006 err = gop->status; 1007 if (unlikely(err)) 1008 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1009 1010 /* Skip first skb fragment if it is on same page as header fragment. */ 1011 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1012 1013 for (i = start; i < nr_frags; i++) { 1014 int j, newerr; 1015 pending_ring_idx_t head; 1016 1017 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 1018 tx_info = &vif->pending_tx_info[pending_idx]; 1019 head = tx_info->head; 1020 1021 /* Check error status: if okay then remember grant handle. */ 1022 do { 1023 newerr = (++gop)->status; 1024 if (newerr) 1025 break; 1026 peek = vif->pending_ring[pending_index(++head)]; 1027 } while (!pending_tx_is_head(vif, peek)); 1028 1029 if (likely(!newerr)) { 1030 /* Had a previous error? Invalidate this fragment. */ 1031 if (unlikely(err)) 1032 xenvif_idx_release(vif, pending_idx, 1033 XEN_NETIF_RSP_OKAY); 1034 continue; 1035 } 1036 1037 /* Error on this fragment: respond to client with an error. */ 1038 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1039 1040 /* Not the first error? Preceding frags already invalidated. */ 1041 if (err) 1042 continue; 1043 1044 /* First error: invalidate header and preceding fragments. */ 1045 pending_idx = *((u16 *)skb->data); 1046 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1047 for (j = start; j < i; j++) { 1048 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1049 xenvif_idx_release(vif, pending_idx, 1050 XEN_NETIF_RSP_OKAY); 1051 } 1052 1053 /* Remember the error: invalidate all subsequent fragments. */ 1054 err = newerr; 1055 } 1056 1057 *gopp = gop + 1; 1058 return err; 1059 } 1060 1061 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1062 { 1063 struct skb_shared_info *shinfo = skb_shinfo(skb); 1064 int nr_frags = shinfo->nr_frags; 1065 int i; 1066 1067 for (i = 0; i < nr_frags; i++) { 1068 skb_frag_t *frag = shinfo->frags + i; 1069 struct xen_netif_tx_request *txp; 1070 struct page *page; 1071 u16 pending_idx; 1072 1073 pending_idx = frag_get_pending_idx(frag); 1074 1075 txp = &vif->pending_tx_info[pending_idx].req; 1076 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1077 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1078 skb->len += txp->size; 1079 skb->data_len += txp->size; 1080 skb->truesize += txp->size; 1081 1082 /* Take an extra reference to offset xenvif_idx_release */ 1083 get_page(vif->mmap_pages[pending_idx]); 1084 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1085 } 1086 } 1087 1088 static int xenvif_get_extras(struct xenvif *vif, 1089 struct xen_netif_extra_info *extras, 1090 int work_to_do) 1091 { 1092 struct xen_netif_extra_info extra; 1093 RING_IDX cons = vif->tx.req_cons; 1094 1095 do { 1096 if (unlikely(work_to_do-- <= 0)) { 1097 netdev_err(vif->dev, "Missing extra info\n"); 1098 xenvif_fatal_tx_err(vif); 1099 return -EBADR; 1100 } 1101 1102 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1103 sizeof(extra)); 1104 if (unlikely(!extra.type || 1105 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1106 vif->tx.req_cons = ++cons; 1107 netdev_err(vif->dev, 1108 "Invalid extra type: %d\n", extra.type); 1109 xenvif_fatal_tx_err(vif); 1110 return -EINVAL; 1111 } 1112 1113 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1114 vif->tx.req_cons = ++cons; 1115 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1116 1117 return work_to_do; 1118 } 1119 1120 static int xenvif_set_skb_gso(struct xenvif *vif, 1121 struct sk_buff *skb, 1122 struct xen_netif_extra_info *gso) 1123 { 1124 if (!gso->u.gso.size) { 1125 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1126 xenvif_fatal_tx_err(vif); 1127 return -EINVAL; 1128 } 1129 1130 switch (gso->u.gso.type) { 1131 case XEN_NETIF_GSO_TYPE_TCPV4: 1132 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1133 break; 1134 case XEN_NETIF_GSO_TYPE_TCPV6: 1135 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 1136 break; 1137 default: 1138 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1139 xenvif_fatal_tx_err(vif); 1140 return -EINVAL; 1141 } 1142 1143 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1144 1145 /* Header must be checked, and gso_segs computed. */ 1146 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1147 skb_shinfo(skb)->gso_segs = 0; 1148 1149 return 0; 1150 } 1151 1152 static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len, 1153 unsigned int max) 1154 { 1155 if (skb_headlen(skb) >= len) 1156 return 0; 1157 1158 /* If we need to pullup then pullup to the max, so we 1159 * won't need to do it again. 1160 */ 1161 if (max > skb->len) 1162 max = skb->len; 1163 1164 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) 1165 return -ENOMEM; 1166 1167 if (skb_headlen(skb) < len) 1168 return -EPROTO; 1169 1170 return 0; 1171 } 1172 1173 /* This value should be large enough to cover a tagged ethernet header plus 1174 * maximally sized IP and TCP or UDP headers. 1175 */ 1176 #define MAX_IP_HDR_LEN 128 1177 1178 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, 1179 int recalculate_partial_csum) 1180 { 1181 unsigned int off; 1182 bool fragment; 1183 int err; 1184 1185 fragment = false; 1186 1187 err = maybe_pull_tail(skb, 1188 sizeof(struct iphdr), 1189 MAX_IP_HDR_LEN); 1190 if (err < 0) 1191 goto out; 1192 1193 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) 1194 fragment = true; 1195 1196 off = ip_hdrlen(skb); 1197 1198 err = -EPROTO; 1199 1200 if (fragment) 1201 goto out; 1202 1203 switch (ip_hdr(skb)->protocol) { 1204 case IPPROTO_TCP: 1205 err = maybe_pull_tail(skb, 1206 off + sizeof(struct tcphdr), 1207 MAX_IP_HDR_LEN); 1208 if (err < 0) 1209 goto out; 1210 1211 if (!skb_partial_csum_set(skb, off, 1212 offsetof(struct tcphdr, check))) { 1213 err = -EPROTO; 1214 goto out; 1215 } 1216 1217 if (recalculate_partial_csum) 1218 tcp_hdr(skb)->check = 1219 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1220 ip_hdr(skb)->daddr, 1221 skb->len - off, 1222 IPPROTO_TCP, 0); 1223 break; 1224 case IPPROTO_UDP: 1225 err = maybe_pull_tail(skb, 1226 off + sizeof(struct udphdr), 1227 MAX_IP_HDR_LEN); 1228 if (err < 0) 1229 goto out; 1230 1231 if (!skb_partial_csum_set(skb, off, 1232 offsetof(struct udphdr, check))) { 1233 err = -EPROTO; 1234 goto out; 1235 } 1236 1237 if (recalculate_partial_csum) 1238 udp_hdr(skb)->check = 1239 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1240 ip_hdr(skb)->daddr, 1241 skb->len - off, 1242 IPPROTO_UDP, 0); 1243 break; 1244 default: 1245 goto out; 1246 } 1247 1248 err = 0; 1249 1250 out: 1251 return err; 1252 } 1253 1254 /* This value should be large enough to cover a tagged ethernet header plus 1255 * an IPv6 header, all options, and a maximal TCP or UDP header. 1256 */ 1257 #define MAX_IPV6_HDR_LEN 256 1258 1259 #define OPT_HDR(type, skb, off) \ 1260 (type *)(skb_network_header(skb) + (off)) 1261 1262 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, 1263 int recalculate_partial_csum) 1264 { 1265 int err; 1266 u8 nexthdr; 1267 unsigned int off; 1268 unsigned int len; 1269 bool fragment; 1270 bool done; 1271 1272 fragment = false; 1273 done = false; 1274 1275 off = sizeof(struct ipv6hdr); 1276 1277 err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); 1278 if (err < 0) 1279 goto out; 1280 1281 nexthdr = ipv6_hdr(skb)->nexthdr; 1282 1283 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 1284 while (off <= len && !done) { 1285 switch (nexthdr) { 1286 case IPPROTO_DSTOPTS: 1287 case IPPROTO_HOPOPTS: 1288 case IPPROTO_ROUTING: { 1289 struct ipv6_opt_hdr *hp; 1290 1291 err = maybe_pull_tail(skb, 1292 off + 1293 sizeof(struct ipv6_opt_hdr), 1294 MAX_IPV6_HDR_LEN); 1295 if (err < 0) 1296 goto out; 1297 1298 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); 1299 nexthdr = hp->nexthdr; 1300 off += ipv6_optlen(hp); 1301 break; 1302 } 1303 case IPPROTO_AH: { 1304 struct ip_auth_hdr *hp; 1305 1306 err = maybe_pull_tail(skb, 1307 off + 1308 sizeof(struct ip_auth_hdr), 1309 MAX_IPV6_HDR_LEN); 1310 if (err < 0) 1311 goto out; 1312 1313 hp = OPT_HDR(struct ip_auth_hdr, skb, off); 1314 nexthdr = hp->nexthdr; 1315 off += ipv6_authlen(hp); 1316 break; 1317 } 1318 case IPPROTO_FRAGMENT: { 1319 struct frag_hdr *hp; 1320 1321 err = maybe_pull_tail(skb, 1322 off + 1323 sizeof(struct frag_hdr), 1324 MAX_IPV6_HDR_LEN); 1325 if (err < 0) 1326 goto out; 1327 1328 hp = OPT_HDR(struct frag_hdr, skb, off); 1329 1330 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) 1331 fragment = true; 1332 1333 nexthdr = hp->nexthdr; 1334 off += sizeof(struct frag_hdr); 1335 break; 1336 } 1337 default: 1338 done = true; 1339 break; 1340 } 1341 } 1342 1343 err = -EPROTO; 1344 1345 if (!done || fragment) 1346 goto out; 1347 1348 switch (nexthdr) { 1349 case IPPROTO_TCP: 1350 err = maybe_pull_tail(skb, 1351 off + sizeof(struct tcphdr), 1352 MAX_IPV6_HDR_LEN); 1353 if (err < 0) 1354 goto out; 1355 1356 if (!skb_partial_csum_set(skb, off, 1357 offsetof(struct tcphdr, check))) { 1358 err = -EPROTO; 1359 goto out; 1360 } 1361 1362 if (recalculate_partial_csum) 1363 tcp_hdr(skb)->check = 1364 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1365 &ipv6_hdr(skb)->daddr, 1366 skb->len - off, 1367 IPPROTO_TCP, 0); 1368 break; 1369 case IPPROTO_UDP: 1370 err = maybe_pull_tail(skb, 1371 off + sizeof(struct udphdr), 1372 MAX_IPV6_HDR_LEN); 1373 if (err < 0) 1374 goto out; 1375 1376 if (!skb_partial_csum_set(skb, off, 1377 offsetof(struct udphdr, check))) { 1378 err = -EPROTO; 1379 goto out; 1380 } 1381 1382 if (recalculate_partial_csum) 1383 udp_hdr(skb)->check = 1384 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1385 &ipv6_hdr(skb)->daddr, 1386 skb->len - off, 1387 IPPROTO_UDP, 0); 1388 break; 1389 default: 1390 goto out; 1391 } 1392 1393 err = 0; 1394 1395 out: 1396 return err; 1397 } 1398 1399 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1400 { 1401 int err = -EPROTO; 1402 int recalculate_partial_csum = 0; 1403 1404 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1405 * peers can fail to set NETRXF_csum_blank when sending a GSO 1406 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1407 * recalculate the partial checksum. 1408 */ 1409 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1410 vif->rx_gso_checksum_fixup++; 1411 skb->ip_summed = CHECKSUM_PARTIAL; 1412 recalculate_partial_csum = 1; 1413 } 1414 1415 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1416 if (skb->ip_summed != CHECKSUM_PARTIAL) 1417 return 0; 1418 1419 if (skb->protocol == htons(ETH_P_IP)) 1420 err = checksum_setup_ip(vif, skb, recalculate_partial_csum); 1421 else if (skb->protocol == htons(ETH_P_IPV6)) 1422 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); 1423 1424 return err; 1425 } 1426 1427 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1428 { 1429 u64 now = get_jiffies_64(); 1430 u64 next_credit = vif->credit_window_start + 1431 msecs_to_jiffies(vif->credit_usec / 1000); 1432 1433 /* Timer could already be pending in rare cases. */ 1434 if (timer_pending(&vif->credit_timeout)) 1435 return true; 1436 1437 /* Passed the point where we can replenish credit? */ 1438 if (time_after_eq64(now, next_credit)) { 1439 vif->credit_window_start = now; 1440 tx_add_credit(vif); 1441 } 1442 1443 /* Still too big to send right now? Set a callback. */ 1444 if (size > vif->remaining_credit) { 1445 vif->credit_timeout.data = 1446 (unsigned long)vif; 1447 vif->credit_timeout.function = 1448 tx_credit_callback; 1449 mod_timer(&vif->credit_timeout, 1450 next_credit); 1451 vif->credit_window_start = next_credit; 1452 1453 return true; 1454 } 1455 1456 return false; 1457 } 1458 1459 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) 1460 { 1461 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1462 struct sk_buff *skb; 1463 int ret; 1464 1465 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1466 < MAX_PENDING_REQS) && 1467 (skb_queue_len(&vif->tx_queue) < budget)) { 1468 struct xen_netif_tx_request txreq; 1469 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1470 struct page *page; 1471 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1472 u16 pending_idx; 1473 RING_IDX idx; 1474 int work_to_do; 1475 unsigned int data_len; 1476 pending_ring_idx_t index; 1477 1478 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1479 XEN_NETIF_TX_RING_SIZE) { 1480 netdev_err(vif->dev, 1481 "Impossible number of requests. " 1482 "req_prod %d, req_cons %d, size %ld\n", 1483 vif->tx.sring->req_prod, vif->tx.req_cons, 1484 XEN_NETIF_TX_RING_SIZE); 1485 xenvif_fatal_tx_err(vif); 1486 continue; 1487 } 1488 1489 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); 1490 if (!work_to_do) 1491 break; 1492 1493 idx = vif->tx.req_cons; 1494 rmb(); /* Ensure that we see the request before we copy it. */ 1495 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1496 1497 /* Credit-based scheduling. */ 1498 if (txreq.size > vif->remaining_credit && 1499 tx_credit_exceeded(vif, txreq.size)) 1500 break; 1501 1502 vif->remaining_credit -= txreq.size; 1503 1504 work_to_do--; 1505 vif->tx.req_cons = ++idx; 1506 1507 memset(extras, 0, sizeof(extras)); 1508 if (txreq.flags & XEN_NETTXF_extra_info) { 1509 work_to_do = xenvif_get_extras(vif, extras, 1510 work_to_do); 1511 idx = vif->tx.req_cons; 1512 if (unlikely(work_to_do < 0)) 1513 break; 1514 } 1515 1516 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1517 if (unlikely(ret < 0)) 1518 break; 1519 1520 idx += ret; 1521 1522 if (unlikely(txreq.size < ETH_HLEN)) { 1523 netdev_dbg(vif->dev, 1524 "Bad packet size: %d\n", txreq.size); 1525 xenvif_tx_err(vif, &txreq, idx); 1526 break; 1527 } 1528 1529 /* No crossing a page as the payload mustn't fragment. */ 1530 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1531 netdev_err(vif->dev, 1532 "txreq.offset: %x, size: %u, end: %lu\n", 1533 txreq.offset, txreq.size, 1534 (txreq.offset&~PAGE_MASK) + txreq.size); 1535 xenvif_fatal_tx_err(vif); 1536 break; 1537 } 1538 1539 index = pending_index(vif->pending_cons); 1540 pending_idx = vif->pending_ring[index]; 1541 1542 data_len = (txreq.size > PKT_PROT_LEN && 1543 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1544 PKT_PROT_LEN : txreq.size; 1545 1546 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1547 GFP_ATOMIC | __GFP_NOWARN); 1548 if (unlikely(skb == NULL)) { 1549 netdev_dbg(vif->dev, 1550 "Can't allocate a skb in start_xmit.\n"); 1551 xenvif_tx_err(vif, &txreq, idx); 1552 break; 1553 } 1554 1555 /* Packets passed to netif_rx() must have some headroom. */ 1556 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1557 1558 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1559 struct xen_netif_extra_info *gso; 1560 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1561 1562 if (xenvif_set_skb_gso(vif, skb, gso)) { 1563 /* Failure in xenvif_set_skb_gso is fatal. */ 1564 kfree_skb(skb); 1565 break; 1566 } 1567 } 1568 1569 /* XXX could copy straight to head */ 1570 page = xenvif_alloc_page(vif, pending_idx); 1571 if (!page) { 1572 kfree_skb(skb); 1573 xenvif_tx_err(vif, &txreq, idx); 1574 break; 1575 } 1576 1577 gop->source.u.ref = txreq.gref; 1578 gop->source.domid = vif->domid; 1579 gop->source.offset = txreq.offset; 1580 1581 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1582 gop->dest.domid = DOMID_SELF; 1583 gop->dest.offset = txreq.offset; 1584 1585 gop->len = txreq.size; 1586 gop->flags = GNTCOPY_source_gref; 1587 1588 gop++; 1589 1590 memcpy(&vif->pending_tx_info[pending_idx].req, 1591 &txreq, sizeof(txreq)); 1592 vif->pending_tx_info[pending_idx].head = index; 1593 *((u16 *)skb->data) = pending_idx; 1594 1595 __skb_put(skb, data_len); 1596 1597 skb_shinfo(skb)->nr_frags = ret; 1598 if (data_len < txreq.size) { 1599 skb_shinfo(skb)->nr_frags++; 1600 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1601 pending_idx); 1602 } else { 1603 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1604 INVALID_PENDING_IDX); 1605 } 1606 1607 vif->pending_cons++; 1608 1609 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1610 if (request_gop == NULL) { 1611 kfree_skb(skb); 1612 xenvif_tx_err(vif, &txreq, idx); 1613 break; 1614 } 1615 gop = request_gop; 1616 1617 __skb_queue_tail(&vif->tx_queue, skb); 1618 1619 vif->tx.req_cons = idx; 1620 1621 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1622 break; 1623 } 1624 1625 return gop - vif->tx_copy_ops; 1626 } 1627 1628 1629 static int xenvif_tx_submit(struct xenvif *vif) 1630 { 1631 struct gnttab_copy *gop = vif->tx_copy_ops; 1632 struct sk_buff *skb; 1633 int work_done = 0; 1634 1635 while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1636 struct xen_netif_tx_request *txp; 1637 u16 pending_idx; 1638 unsigned data_len; 1639 1640 pending_idx = *((u16 *)skb->data); 1641 txp = &vif->pending_tx_info[pending_idx].req; 1642 1643 /* Check the remap error code. */ 1644 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { 1645 netdev_dbg(vif->dev, "netback grant failed.\n"); 1646 skb_shinfo(skb)->nr_frags = 0; 1647 kfree_skb(skb); 1648 continue; 1649 } 1650 1651 data_len = skb->len; 1652 memcpy(skb->data, 1653 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1654 data_len); 1655 if (data_len < txp->size) { 1656 /* Append the packet payload as a fragment. */ 1657 txp->offset += data_len; 1658 txp->size -= data_len; 1659 } else { 1660 /* Schedule a response immediately. */ 1661 xenvif_idx_release(vif, pending_idx, 1662 XEN_NETIF_RSP_OKAY); 1663 } 1664 1665 if (txp->flags & XEN_NETTXF_csum_blank) 1666 skb->ip_summed = CHECKSUM_PARTIAL; 1667 else if (txp->flags & XEN_NETTXF_data_validated) 1668 skb->ip_summed = CHECKSUM_UNNECESSARY; 1669 1670 xenvif_fill_frags(vif, skb); 1671 1672 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { 1673 int target = min_t(int, skb->len, PKT_PROT_LEN); 1674 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1675 } 1676 1677 skb->dev = vif->dev; 1678 skb->protocol = eth_type_trans(skb, skb->dev); 1679 skb_reset_network_header(skb); 1680 1681 if (checksum_setup(vif, skb)) { 1682 netdev_dbg(vif->dev, 1683 "Can't setup checksum in net_tx_action\n"); 1684 kfree_skb(skb); 1685 continue; 1686 } 1687 1688 skb_probe_transport_header(skb, 0); 1689 1690 vif->dev->stats.rx_bytes += skb->len; 1691 vif->dev->stats.rx_packets++; 1692 1693 work_done++; 1694 1695 netif_receive_skb(skb); 1696 } 1697 1698 return work_done; 1699 } 1700 1701 /* Called after netfront has transmitted */ 1702 int xenvif_tx_action(struct xenvif *vif, int budget) 1703 { 1704 unsigned nr_gops; 1705 int work_done; 1706 1707 if (unlikely(!tx_work_todo(vif))) 1708 return 0; 1709 1710 nr_gops = xenvif_tx_build_gops(vif, budget); 1711 1712 if (nr_gops == 0) 1713 return 0; 1714 1715 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1716 1717 work_done = xenvif_tx_submit(vif); 1718 1719 return work_done; 1720 } 1721 1722 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1723 u8 status) 1724 { 1725 struct pending_tx_info *pending_tx_info; 1726 pending_ring_idx_t head; 1727 u16 peek; /* peek into next tx request */ 1728 1729 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1730 1731 /* Already complete? */ 1732 if (vif->mmap_pages[pending_idx] == NULL) 1733 return; 1734 1735 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1736 1737 head = pending_tx_info->head; 1738 1739 BUG_ON(!pending_tx_is_head(vif, head)); 1740 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); 1741 1742 do { 1743 pending_ring_idx_t index; 1744 pending_ring_idx_t idx = pending_index(head); 1745 u16 info_idx = vif->pending_ring[idx]; 1746 1747 pending_tx_info = &vif->pending_tx_info[info_idx]; 1748 make_tx_response(vif, &pending_tx_info->req, status); 1749 1750 /* Setting any number other than 1751 * INVALID_PENDING_RING_IDX indicates this slot is 1752 * starting a new packet / ending a previous packet. 1753 */ 1754 pending_tx_info->head = 0; 1755 1756 index = pending_index(vif->pending_prod++); 1757 vif->pending_ring[index] = vif->pending_ring[info_idx]; 1758 1759 peek = vif->pending_ring[pending_index(++head)]; 1760 1761 } while (!pending_tx_is_head(vif, peek)); 1762 1763 put_page(vif->mmap_pages[pending_idx]); 1764 vif->mmap_pages[pending_idx] = NULL; 1765 } 1766 1767 1768 static void make_tx_response(struct xenvif *vif, 1769 struct xen_netif_tx_request *txp, 1770 s8 st) 1771 { 1772 RING_IDX i = vif->tx.rsp_prod_pvt; 1773 struct xen_netif_tx_response *resp; 1774 int notify; 1775 1776 resp = RING_GET_RESPONSE(&vif->tx, i); 1777 resp->id = txp->id; 1778 resp->status = st; 1779 1780 if (txp->flags & XEN_NETTXF_extra_info) 1781 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1782 1783 vif->tx.rsp_prod_pvt = ++i; 1784 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1785 if (notify) 1786 notify_remote_via_irq(vif->tx_irq); 1787 } 1788 1789 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1790 u16 id, 1791 s8 st, 1792 u16 offset, 1793 u16 size, 1794 u16 flags) 1795 { 1796 RING_IDX i = vif->rx.rsp_prod_pvt; 1797 struct xen_netif_rx_response *resp; 1798 1799 resp = RING_GET_RESPONSE(&vif->rx, i); 1800 resp->offset = offset; 1801 resp->flags = flags; 1802 resp->id = id; 1803 resp->status = (s16)size; 1804 if (st < 0) 1805 resp->status = (s16)st; 1806 1807 vif->rx.rsp_prod_pvt = ++i; 1808 1809 return resp; 1810 } 1811 1812 static inline int rx_work_todo(struct xenvif *vif) 1813 { 1814 return !skb_queue_empty(&vif->rx_queue); 1815 } 1816 1817 static inline int tx_work_todo(struct xenvif *vif) 1818 { 1819 1820 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && 1821 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1822 < MAX_PENDING_REQS)) 1823 return 1; 1824 1825 return 0; 1826 } 1827 1828 void xenvif_unmap_frontend_rings(struct xenvif *vif) 1829 { 1830 if (vif->tx.sring) 1831 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1832 vif->tx.sring); 1833 if (vif->rx.sring) 1834 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1835 vif->rx.sring); 1836 } 1837 1838 int xenvif_map_frontend_rings(struct xenvif *vif, 1839 grant_ref_t tx_ring_ref, 1840 grant_ref_t rx_ring_ref) 1841 { 1842 void *addr; 1843 struct xen_netif_tx_sring *txs; 1844 struct xen_netif_rx_sring *rxs; 1845 1846 int err = -ENOMEM; 1847 1848 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1849 tx_ring_ref, &addr); 1850 if (err) 1851 goto err; 1852 1853 txs = (struct xen_netif_tx_sring *)addr; 1854 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1855 1856 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1857 rx_ring_ref, &addr); 1858 if (err) 1859 goto err; 1860 1861 rxs = (struct xen_netif_rx_sring *)addr; 1862 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1863 1864 vif->rx_req_cons_peek = 0; 1865 1866 return 0; 1867 1868 err: 1869 xenvif_unmap_frontend_rings(vif); 1870 return err; 1871 } 1872 1873 int xenvif_kthread(void *data) 1874 { 1875 struct xenvif *vif = data; 1876 1877 while (!kthread_should_stop()) { 1878 wait_event_interruptible(vif->wq, 1879 rx_work_todo(vif) || 1880 kthread_should_stop()); 1881 if (kthread_should_stop()) 1882 break; 1883 1884 if (rx_work_todo(vif)) 1885 xenvif_rx_action(vif); 1886 1887 cond_resched(); 1888 } 1889 1890 return 0; 1891 } 1892 1893 static int __init netback_init(void) 1894 { 1895 int rc = 0; 1896 1897 if (!xen_domain()) 1898 return -ENODEV; 1899 1900 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1901 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1902 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1903 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1904 } 1905 1906 rc = xenvif_xenbus_init(); 1907 if (rc) 1908 goto failed_init; 1909 1910 return 0; 1911 1912 failed_init: 1913 return rc; 1914 } 1915 1916 module_init(netback_init); 1917 1918 static void __exit netback_fini(void) 1919 { 1920 xenvif_xenbus_fini(); 1921 } 1922 module_exit(netback_fini); 1923 1924 MODULE_LICENSE("Dual BSD/GPL"); 1925 MODULE_ALIAS("xen-backend:vif"); 1926