1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 #include <net/ip6_checksum.h> 43 44 #include <xen/xen.h> 45 #include <xen/events.h> 46 #include <xen/interface/memory.h> 47 48 #include <asm/xen/hypercall.h> 49 #include <asm/xen/page.h> 50 51 /* Provide an option to disable split event channels at load time as 52 * event channels are limited resource. Split event channels are 53 * enabled by default. 54 */ 55 bool separate_tx_rx_irq = 1; 56 module_param(separate_tx_rx_irq, bool, 0644); 57 58 /* 59 * This is the maximum slots a skb can have. If a guest sends a skb 60 * which exceeds this limit it is considered malicious. 61 */ 62 #define FATAL_SKB_SLOTS_DEFAULT 20 63 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 64 module_param(fatal_skb_slots, uint, 0444); 65 66 /* 67 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 68 * the maximum slots a valid packet can use. Now this value is defined 69 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 70 * all backend. 71 */ 72 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 73 74 /* 75 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 76 * one or more merged tx requests, otherwise it is the continuation of 77 * previous tx request. 78 */ 79 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) 80 { 81 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 82 } 83 84 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 85 u8 status); 86 87 static void make_tx_response(struct xenvif *vif, 88 struct xen_netif_tx_request *txp, 89 s8 st); 90 91 static inline int tx_work_todo(struct xenvif *vif); 92 static inline int rx_work_todo(struct xenvif *vif); 93 94 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 95 u16 id, 96 s8 st, 97 u16 offset, 98 u16 size, 99 u16 flags); 100 101 static inline unsigned long idx_to_pfn(struct xenvif *vif, 102 u16 idx) 103 { 104 return page_to_pfn(vif->mmap_pages[idx]); 105 } 106 107 static inline unsigned long idx_to_kaddr(struct xenvif *vif, 108 u16 idx) 109 { 110 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 111 } 112 113 /* This is a miniumum size for the linear area to avoid lots of 114 * calls to __pskb_pull_tail() as we set up checksum offsets. The 115 * value 128 was chosen as it covers all IPv4 and most likely 116 * IPv6 headers. 117 */ 118 #define PKT_PROT_LEN 128 119 120 static u16 frag_get_pending_idx(skb_frag_t *frag) 121 { 122 return (u16)frag->page_offset; 123 } 124 125 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 126 { 127 frag->page_offset = pending_idx; 128 } 129 130 static inline pending_ring_idx_t pending_index(unsigned i) 131 { 132 return i & (MAX_PENDING_REQS-1); 133 } 134 135 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 136 { 137 return MAX_PENDING_REQS - 138 vif->pending_prod + vif->pending_cons; 139 } 140 141 static int max_required_rx_slots(struct xenvif *vif) 142 { 143 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 144 145 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 146 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask) 147 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 148 149 return max; 150 } 151 152 int xenvif_rx_ring_full(struct xenvif *vif) 153 { 154 RING_IDX peek = vif->rx_req_cons_peek; 155 RING_IDX needed = max_required_rx_slots(vif); 156 157 return ((vif->rx.sring->req_prod - peek) < needed) || 158 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 159 } 160 161 int xenvif_must_stop_queue(struct xenvif *vif) 162 { 163 if (!xenvif_rx_ring_full(vif)) 164 return 0; 165 166 vif->rx.sring->req_event = vif->rx_req_cons_peek + 167 max_required_rx_slots(vif); 168 mb(); /* request notification /then/ check the queue */ 169 170 return xenvif_rx_ring_full(vif); 171 } 172 173 /* 174 * Returns true if we should start a new receive buffer instead of 175 * adding 'size' bytes to a buffer which currently contains 'offset' 176 * bytes. 177 */ 178 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 179 { 180 /* simple case: we have completely filled the current buffer. */ 181 if (offset == MAX_BUFFER_OFFSET) 182 return true; 183 184 /* 185 * complex case: start a fresh buffer if the current frag 186 * would overflow the current buffer but only if: 187 * (i) this frag would fit completely in the next buffer 188 * and (ii) there is already some data in the current buffer 189 * and (iii) this is not the head buffer. 190 * 191 * Where: 192 * - (i) stops us splitting a frag into two copies 193 * unless the frag is too large for a single buffer. 194 * - (ii) stops us from leaving a buffer pointlessly empty. 195 * - (iii) stops us leaving the first buffer 196 * empty. Strictly speaking this is already covered 197 * by (ii) but is explicitly checked because 198 * netfront relies on the first buffer being 199 * non-empty and can crash otherwise. 200 * 201 * This means we will effectively linearise small 202 * frags but do not needlessly split large buffers 203 * into multiple copies tend to give large frags their 204 * own buffers as before. 205 */ 206 if ((offset + size > MAX_BUFFER_OFFSET) && 207 (size <= MAX_BUFFER_OFFSET) && offset && !head) 208 return true; 209 210 return false; 211 } 212 213 struct xenvif_count_slot_state { 214 unsigned long copy_off; 215 bool head; 216 }; 217 218 unsigned int xenvif_count_frag_slots(struct xenvif *vif, 219 unsigned long offset, unsigned long size, 220 struct xenvif_count_slot_state *state) 221 { 222 unsigned count = 0; 223 224 offset &= ~PAGE_MASK; 225 226 while (size > 0) { 227 unsigned long bytes; 228 229 bytes = PAGE_SIZE - offset; 230 231 if (bytes > size) 232 bytes = size; 233 234 if (start_new_rx_buffer(state->copy_off, bytes, state->head)) { 235 count++; 236 state->copy_off = 0; 237 } 238 239 if (state->copy_off + bytes > MAX_BUFFER_OFFSET) 240 bytes = MAX_BUFFER_OFFSET - state->copy_off; 241 242 state->copy_off += bytes; 243 244 offset += bytes; 245 size -= bytes; 246 247 if (offset == PAGE_SIZE) 248 offset = 0; 249 250 state->head = false; 251 } 252 253 return count; 254 } 255 256 /* 257 * Figure out how many ring slots we're going to need to send @skb to 258 * the guest. This function is essentially a dry run of 259 * xenvif_gop_frag_copy. 260 */ 261 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 262 { 263 struct xenvif_count_slot_state state; 264 unsigned int count; 265 unsigned char *data; 266 unsigned i; 267 268 state.head = true; 269 state.copy_off = 0; 270 271 /* Slot for the first (partial) page of data. */ 272 count = 1; 273 274 /* Need a slot for the GSO prefix for GSO extra data? */ 275 if (skb_shinfo(skb)->gso_size) 276 count++; 277 278 data = skb->data; 279 while (data < skb_tail_pointer(skb)) { 280 unsigned long offset = offset_in_page(data); 281 unsigned long size = PAGE_SIZE - offset; 282 283 if (data + size > skb_tail_pointer(skb)) 284 size = skb_tail_pointer(skb) - data; 285 286 count += xenvif_count_frag_slots(vif, offset, size, &state); 287 288 data += size; 289 } 290 291 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 292 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 293 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 294 295 count += xenvif_count_frag_slots(vif, offset, size, &state); 296 } 297 return count; 298 } 299 300 struct netrx_pending_operations { 301 unsigned copy_prod, copy_cons; 302 unsigned meta_prod, meta_cons; 303 struct gnttab_copy *copy; 304 struct xenvif_rx_meta *meta; 305 int copy_off; 306 grant_ref_t copy_gref; 307 }; 308 309 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 310 struct netrx_pending_operations *npo) 311 { 312 struct xenvif_rx_meta *meta; 313 struct xen_netif_rx_request *req; 314 315 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 316 317 meta = npo->meta + npo->meta_prod++; 318 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 319 meta->gso_size = 0; 320 meta->size = 0; 321 meta->id = req->id; 322 323 npo->copy_off = 0; 324 npo->copy_gref = req->gref; 325 326 return meta; 327 } 328 329 /* 330 * Set up the grant operations for this fragment. If it's a flipping 331 * interface, we also set up the unmap request from here. 332 */ 333 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 334 struct netrx_pending_operations *npo, 335 struct page *page, unsigned long size, 336 unsigned long offset, int *head) 337 { 338 struct gnttab_copy *copy_gop; 339 struct xenvif_rx_meta *meta; 340 unsigned long bytes; 341 int gso_type; 342 343 /* Data must not cross a page boundary. */ 344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 345 346 meta = npo->meta + npo->meta_prod - 1; 347 348 /* Skip unused frames from start of page */ 349 page += offset >> PAGE_SHIFT; 350 offset &= ~PAGE_MASK; 351 352 while (size > 0) { 353 BUG_ON(offset >= PAGE_SIZE); 354 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 355 356 bytes = PAGE_SIZE - offset; 357 358 if (bytes > size) 359 bytes = size; 360 361 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 362 /* 363 * Netfront requires there to be some data in the head 364 * buffer. 365 */ 366 BUG_ON(*head); 367 368 meta = get_next_rx_buffer(vif, npo); 369 } 370 371 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 372 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 373 374 copy_gop = npo->copy + npo->copy_prod++; 375 copy_gop->flags = GNTCOPY_dest_gref; 376 copy_gop->len = bytes; 377 378 copy_gop->source.domid = DOMID_SELF; 379 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 380 copy_gop->source.offset = offset; 381 382 copy_gop->dest.domid = vif->domid; 383 copy_gop->dest.offset = npo->copy_off; 384 copy_gop->dest.u.ref = npo->copy_gref; 385 386 npo->copy_off += bytes; 387 meta->size += bytes; 388 389 offset += bytes; 390 size -= bytes; 391 392 /* Next frame */ 393 if (offset == PAGE_SIZE && size) { 394 BUG_ON(!PageCompound(page)); 395 page++; 396 offset = 0; 397 } 398 399 /* Leave a gap for the GSO descriptor. */ 400 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 401 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 402 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 403 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 404 else 405 gso_type = XEN_NETIF_GSO_TYPE_NONE; 406 407 if (*head && ((1 << gso_type) & vif->gso_mask)) 408 vif->rx.req_cons++; 409 410 *head = 0; /* There must be something in this buffer now. */ 411 412 } 413 } 414 415 /* 416 * Prepare an SKB to be transmitted to the frontend. 417 * 418 * This function is responsible for allocating grant operations, meta 419 * structures, etc. 420 * 421 * It returns the number of meta structures consumed. The number of 422 * ring slots used is always equal to the number of meta slots used 423 * plus the number of GSO descriptors used. Currently, we use either 424 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 425 * frontend-side LRO). 426 */ 427 static int xenvif_gop_skb(struct sk_buff *skb, 428 struct netrx_pending_operations *npo) 429 { 430 struct xenvif *vif = netdev_priv(skb->dev); 431 int nr_frags = skb_shinfo(skb)->nr_frags; 432 int i; 433 struct xen_netif_rx_request *req; 434 struct xenvif_rx_meta *meta; 435 unsigned char *data; 436 int head = 1; 437 int old_meta_prod; 438 int gso_type; 439 int gso_size; 440 441 old_meta_prod = npo->meta_prod; 442 443 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { 444 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 445 gso_size = skb_shinfo(skb)->gso_size; 446 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { 447 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 448 gso_size = skb_shinfo(skb)->gso_size; 449 } else { 450 gso_type = XEN_NETIF_GSO_TYPE_NONE; 451 gso_size = 0; 452 } 453 454 /* Set up a GSO prefix descriptor, if necessary */ 455 if ((1 << gso_type) & vif->gso_prefix_mask) { 456 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 457 meta = npo->meta + npo->meta_prod++; 458 meta->gso_type = gso_type; 459 meta->gso_size = gso_size; 460 meta->size = 0; 461 meta->id = req->id; 462 } 463 464 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 465 meta = npo->meta + npo->meta_prod++; 466 467 if ((1 << gso_type) & vif->gso_mask) { 468 meta->gso_type = gso_type; 469 meta->gso_size = gso_size; 470 } else { 471 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 472 meta->gso_size = 0; 473 } 474 475 meta->size = 0; 476 meta->id = req->id; 477 npo->copy_off = 0; 478 npo->copy_gref = req->gref; 479 480 data = skb->data; 481 while (data < skb_tail_pointer(skb)) { 482 unsigned int offset = offset_in_page(data); 483 unsigned int len = PAGE_SIZE - offset; 484 485 if (data + len > skb_tail_pointer(skb)) 486 len = skb_tail_pointer(skb) - data; 487 488 xenvif_gop_frag_copy(vif, skb, npo, 489 virt_to_page(data), len, offset, &head); 490 data += len; 491 } 492 493 for (i = 0; i < nr_frags; i++) { 494 xenvif_gop_frag_copy(vif, skb, npo, 495 skb_frag_page(&skb_shinfo(skb)->frags[i]), 496 skb_frag_size(&skb_shinfo(skb)->frags[i]), 497 skb_shinfo(skb)->frags[i].page_offset, 498 &head); 499 } 500 501 return npo->meta_prod - old_meta_prod; 502 } 503 504 /* 505 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was 506 * used to set up the operations on the top of 507 * netrx_pending_operations, which have since been done. Check that 508 * they didn't give any errors and advance over them. 509 */ 510 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, 511 struct netrx_pending_operations *npo) 512 { 513 struct gnttab_copy *copy_op; 514 int status = XEN_NETIF_RSP_OKAY; 515 int i; 516 517 for (i = 0; i < nr_meta_slots; i++) { 518 copy_op = npo->copy + npo->copy_cons++; 519 if (copy_op->status != GNTST_okay) { 520 netdev_dbg(vif->dev, 521 "Bad status %d from copy to DOM%d.\n", 522 copy_op->status, vif->domid); 523 status = XEN_NETIF_RSP_ERROR; 524 } 525 } 526 527 return status; 528 } 529 530 static void xenvif_add_frag_responses(struct xenvif *vif, int status, 531 struct xenvif_rx_meta *meta, 532 int nr_meta_slots) 533 { 534 int i; 535 unsigned long offset; 536 537 /* No fragments used */ 538 if (nr_meta_slots <= 1) 539 return; 540 541 nr_meta_slots--; 542 543 for (i = 0; i < nr_meta_slots; i++) { 544 int flags; 545 if (i == nr_meta_slots - 1) 546 flags = 0; 547 else 548 flags = XEN_NETRXF_more_data; 549 550 offset = 0; 551 make_rx_response(vif, meta[i].id, status, offset, 552 meta[i].size, flags); 553 } 554 } 555 556 struct skb_cb_overlay { 557 int meta_slots_used; 558 }; 559 560 static void xenvif_kick_thread(struct xenvif *vif) 561 { 562 wake_up(&vif->wq); 563 } 564 565 void xenvif_rx_action(struct xenvif *vif) 566 { 567 s8 status; 568 u16 flags; 569 struct xen_netif_rx_response *resp; 570 struct sk_buff_head rxq; 571 struct sk_buff *skb; 572 LIST_HEAD(notify); 573 int ret; 574 int nr_frags; 575 int count; 576 unsigned long offset; 577 struct skb_cb_overlay *sco; 578 int need_to_notify = 0; 579 580 struct netrx_pending_operations npo = { 581 .copy = vif->grant_copy_op, 582 .meta = vif->meta, 583 }; 584 585 skb_queue_head_init(&rxq); 586 587 count = 0; 588 589 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 590 vif = netdev_priv(skb->dev); 591 nr_frags = skb_shinfo(skb)->nr_frags; 592 593 sco = (struct skb_cb_overlay *)skb->cb; 594 sco->meta_slots_used = xenvif_gop_skb(skb, &npo); 595 596 count += nr_frags + 1; 597 598 __skb_queue_tail(&rxq, skb); 599 600 /* Filled the batch queue? */ 601 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 602 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 603 break; 604 } 605 606 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 607 608 if (!npo.copy_prod) 609 return; 610 611 BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); 612 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 613 614 while ((skb = __skb_dequeue(&rxq)) != NULL) { 615 sco = (struct skb_cb_overlay *)skb->cb; 616 617 vif = netdev_priv(skb->dev); 618 619 if ((1 << vif->meta[npo.meta_cons].gso_type) & 620 vif->gso_prefix_mask) { 621 resp = RING_GET_RESPONSE(&vif->rx, 622 vif->rx.rsp_prod_pvt++); 623 624 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 625 626 resp->offset = vif->meta[npo.meta_cons].gso_size; 627 resp->id = vif->meta[npo.meta_cons].id; 628 resp->status = sco->meta_slots_used; 629 630 npo.meta_cons++; 631 sco->meta_slots_used--; 632 } 633 634 635 vif->dev->stats.tx_bytes += skb->len; 636 vif->dev->stats.tx_packets++; 637 638 status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); 639 640 if (sco->meta_slots_used == 1) 641 flags = 0; 642 else 643 flags = XEN_NETRXF_more_data; 644 645 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 646 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 647 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 648 /* remote but checksummed. */ 649 flags |= XEN_NETRXF_data_validated; 650 651 offset = 0; 652 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 653 status, offset, 654 vif->meta[npo.meta_cons].size, 655 flags); 656 657 if ((1 << vif->meta[npo.meta_cons].gso_type) & 658 vif->gso_mask) { 659 struct xen_netif_extra_info *gso = 660 (struct xen_netif_extra_info *) 661 RING_GET_RESPONSE(&vif->rx, 662 vif->rx.rsp_prod_pvt++); 663 664 resp->flags |= XEN_NETRXF_extra_info; 665 666 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; 667 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 668 gso->u.gso.pad = 0; 669 gso->u.gso.features = 0; 670 671 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 672 gso->flags = 0; 673 } 674 675 xenvif_add_frag_responses(vif, status, 676 vif->meta + npo.meta_cons + 1, 677 sco->meta_slots_used); 678 679 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 680 681 if (ret) 682 need_to_notify = 1; 683 684 xenvif_notify_tx_completion(vif); 685 686 npo.meta_cons += sco->meta_slots_used; 687 dev_kfree_skb(skb); 688 } 689 690 if (need_to_notify) 691 notify_remote_via_irq(vif->rx_irq); 692 693 /* More work to do? */ 694 if (!skb_queue_empty(&vif->rx_queue)) 695 xenvif_kick_thread(vif); 696 } 697 698 void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 699 { 700 skb_queue_tail(&vif->rx_queue, skb); 701 702 xenvif_kick_thread(vif); 703 } 704 705 void xenvif_check_rx_xenvif(struct xenvif *vif) 706 { 707 int more_to_do; 708 709 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 710 711 if (more_to_do) 712 napi_schedule(&vif->napi); 713 } 714 715 static void tx_add_credit(struct xenvif *vif) 716 { 717 unsigned long max_burst, max_credit; 718 719 /* 720 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 721 * Otherwise the interface can seize up due to insufficient credit. 722 */ 723 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 724 max_burst = min(max_burst, 131072UL); 725 max_burst = max(max_burst, vif->credit_bytes); 726 727 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 728 max_credit = vif->remaining_credit + vif->credit_bytes; 729 if (max_credit < vif->remaining_credit) 730 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 731 732 vif->remaining_credit = min(max_credit, max_burst); 733 } 734 735 static void tx_credit_callback(unsigned long data) 736 { 737 struct xenvif *vif = (struct xenvif *)data; 738 tx_add_credit(vif); 739 xenvif_check_rx_xenvif(vif); 740 } 741 742 static void xenvif_tx_err(struct xenvif *vif, 743 struct xen_netif_tx_request *txp, RING_IDX end) 744 { 745 RING_IDX cons = vif->tx.req_cons; 746 747 do { 748 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 749 if (cons == end) 750 break; 751 txp = RING_GET_REQUEST(&vif->tx, cons++); 752 } while (1); 753 vif->tx.req_cons = cons; 754 } 755 756 static void xenvif_fatal_tx_err(struct xenvif *vif) 757 { 758 netdev_err(vif->dev, "fatal error; disabling device\n"); 759 xenvif_carrier_off(vif); 760 } 761 762 static int xenvif_count_requests(struct xenvif *vif, 763 struct xen_netif_tx_request *first, 764 struct xen_netif_tx_request *txp, 765 int work_to_do) 766 { 767 RING_IDX cons = vif->tx.req_cons; 768 int slots = 0; 769 int drop_err = 0; 770 int more_data; 771 772 if (!(first->flags & XEN_NETTXF_more_data)) 773 return 0; 774 775 do { 776 struct xen_netif_tx_request dropped_tx = { 0 }; 777 778 if (slots >= work_to_do) { 779 netdev_err(vif->dev, 780 "Asked for %d slots but exceeds this limit\n", 781 work_to_do); 782 xenvif_fatal_tx_err(vif); 783 return -ENODATA; 784 } 785 786 /* This guest is really using too many slots and 787 * considered malicious. 788 */ 789 if (unlikely(slots >= fatal_skb_slots)) { 790 netdev_err(vif->dev, 791 "Malicious frontend using %d slots, threshold %u\n", 792 slots, fatal_skb_slots); 793 xenvif_fatal_tx_err(vif); 794 return -E2BIG; 795 } 796 797 /* Xen network protocol had implicit dependency on 798 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 799 * the historical MAX_SKB_FRAGS value 18 to honor the 800 * same behavior as before. Any packet using more than 801 * 18 slots but less than fatal_skb_slots slots is 802 * dropped 803 */ 804 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 805 if (net_ratelimit()) 806 netdev_dbg(vif->dev, 807 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 808 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 809 drop_err = -E2BIG; 810 } 811 812 if (drop_err) 813 txp = &dropped_tx; 814 815 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 816 sizeof(*txp)); 817 818 /* If the guest submitted a frame >= 64 KiB then 819 * first->size overflowed and following slots will 820 * appear to be larger than the frame. 821 * 822 * This cannot be fatal error as there are buggy 823 * frontends that do this. 824 * 825 * Consume all slots and drop the packet. 826 */ 827 if (!drop_err && txp->size > first->size) { 828 if (net_ratelimit()) 829 netdev_dbg(vif->dev, 830 "Invalid tx request, slot size %u > remaining size %u\n", 831 txp->size, first->size); 832 drop_err = -EIO; 833 } 834 835 first->size -= txp->size; 836 slots++; 837 838 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 839 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 840 txp->offset, txp->size); 841 xenvif_fatal_tx_err(vif); 842 return -EINVAL; 843 } 844 845 more_data = txp->flags & XEN_NETTXF_more_data; 846 847 if (!drop_err) 848 txp++; 849 850 } while (more_data); 851 852 if (drop_err) { 853 xenvif_tx_err(vif, first, cons + slots); 854 return drop_err; 855 } 856 857 return slots; 858 } 859 860 static struct page *xenvif_alloc_page(struct xenvif *vif, 861 u16 pending_idx) 862 { 863 struct page *page; 864 865 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 866 if (!page) 867 return NULL; 868 vif->mmap_pages[pending_idx] = page; 869 870 return page; 871 } 872 873 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 874 struct sk_buff *skb, 875 struct xen_netif_tx_request *txp, 876 struct gnttab_copy *gop) 877 { 878 struct skb_shared_info *shinfo = skb_shinfo(skb); 879 skb_frag_t *frags = shinfo->frags; 880 u16 pending_idx = *((u16 *)skb->data); 881 u16 head_idx = 0; 882 int slot, start; 883 struct page *page; 884 pending_ring_idx_t index, start_idx = 0; 885 uint16_t dst_offset; 886 unsigned int nr_slots; 887 struct pending_tx_info *first = NULL; 888 889 /* At this point shinfo->nr_frags is in fact the number of 890 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 891 */ 892 nr_slots = shinfo->nr_frags; 893 894 /* Skip first skb fragment if it is on same page as header fragment. */ 895 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 896 897 /* Coalesce tx requests, at this point the packet passed in 898 * should be <= 64K. Any packets larger than 64K have been 899 * handled in xenvif_count_requests(). 900 */ 901 for (shinfo->nr_frags = slot = start; slot < nr_slots; 902 shinfo->nr_frags++) { 903 struct pending_tx_info *pending_tx_info = 904 vif->pending_tx_info; 905 906 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 907 if (!page) 908 goto err; 909 910 dst_offset = 0; 911 first = NULL; 912 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 913 gop->flags = GNTCOPY_source_gref; 914 915 gop->source.u.ref = txp->gref; 916 gop->source.domid = vif->domid; 917 gop->source.offset = txp->offset; 918 919 gop->dest.domid = DOMID_SELF; 920 921 gop->dest.offset = dst_offset; 922 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 923 924 if (dst_offset + txp->size > PAGE_SIZE) { 925 /* This page can only merge a portion 926 * of tx request. Do not increment any 927 * pointer / counter here. The txp 928 * will be dealt with in future 929 * rounds, eventually hitting the 930 * `else` branch. 931 */ 932 gop->len = PAGE_SIZE - dst_offset; 933 txp->offset += gop->len; 934 txp->size -= gop->len; 935 dst_offset += gop->len; /* quit loop */ 936 } else { 937 /* This tx request can be merged in the page */ 938 gop->len = txp->size; 939 dst_offset += gop->len; 940 941 index = pending_index(vif->pending_cons++); 942 943 pending_idx = vif->pending_ring[index]; 944 945 memcpy(&pending_tx_info[pending_idx].req, txp, 946 sizeof(*txp)); 947 948 /* Poison these fields, corresponding 949 * fields for head tx req will be set 950 * to correct values after the loop. 951 */ 952 vif->mmap_pages[pending_idx] = (void *)(~0UL); 953 pending_tx_info[pending_idx].head = 954 INVALID_PENDING_RING_IDX; 955 956 if (!first) { 957 first = &pending_tx_info[pending_idx]; 958 start_idx = index; 959 head_idx = pending_idx; 960 } 961 962 txp++; 963 slot++; 964 } 965 966 gop++; 967 } 968 969 first->req.offset = 0; 970 first->req.size = dst_offset; 971 first->head = start_idx; 972 vif->mmap_pages[head_idx] = page; 973 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 974 } 975 976 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 977 978 return gop; 979 err: 980 /* Unwind, freeing all pages and sending error responses. */ 981 while (shinfo->nr_frags-- > start) { 982 xenvif_idx_release(vif, 983 frag_get_pending_idx(&frags[shinfo->nr_frags]), 984 XEN_NETIF_RSP_ERROR); 985 } 986 /* The head too, if necessary. */ 987 if (start) 988 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 989 990 return NULL; 991 } 992 993 static int xenvif_tx_check_gop(struct xenvif *vif, 994 struct sk_buff *skb, 995 struct gnttab_copy **gopp) 996 { 997 struct gnttab_copy *gop = *gopp; 998 u16 pending_idx = *((u16 *)skb->data); 999 struct skb_shared_info *shinfo = skb_shinfo(skb); 1000 struct pending_tx_info *tx_info; 1001 int nr_frags = shinfo->nr_frags; 1002 int i, err, start; 1003 u16 peek; /* peek into next tx request */ 1004 1005 /* Check status of header. */ 1006 err = gop->status; 1007 if (unlikely(err)) 1008 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1009 1010 /* Skip first skb fragment if it is on same page as header fragment. */ 1011 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1012 1013 for (i = start; i < nr_frags; i++) { 1014 int j, newerr; 1015 pending_ring_idx_t head; 1016 1017 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 1018 tx_info = &vif->pending_tx_info[pending_idx]; 1019 head = tx_info->head; 1020 1021 /* Check error status: if okay then remember grant handle. */ 1022 do { 1023 newerr = (++gop)->status; 1024 if (newerr) 1025 break; 1026 peek = vif->pending_ring[pending_index(++head)]; 1027 } while (!pending_tx_is_head(vif, peek)); 1028 1029 if (likely(!newerr)) { 1030 /* Had a previous error? Invalidate this fragment. */ 1031 if (unlikely(err)) 1032 xenvif_idx_release(vif, pending_idx, 1033 XEN_NETIF_RSP_OKAY); 1034 continue; 1035 } 1036 1037 /* Error on this fragment: respond to client with an error. */ 1038 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1039 1040 /* Not the first error? Preceding frags already invalidated. */ 1041 if (err) 1042 continue; 1043 1044 /* First error: invalidate header and preceding fragments. */ 1045 pending_idx = *((u16 *)skb->data); 1046 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1047 for (j = start; j < i; j++) { 1048 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1049 xenvif_idx_release(vif, pending_idx, 1050 XEN_NETIF_RSP_OKAY); 1051 } 1052 1053 /* Remember the error: invalidate all subsequent fragments. */ 1054 err = newerr; 1055 } 1056 1057 *gopp = gop + 1; 1058 return err; 1059 } 1060 1061 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1062 { 1063 struct skb_shared_info *shinfo = skb_shinfo(skb); 1064 int nr_frags = shinfo->nr_frags; 1065 int i; 1066 1067 for (i = 0; i < nr_frags; i++) { 1068 skb_frag_t *frag = shinfo->frags + i; 1069 struct xen_netif_tx_request *txp; 1070 struct page *page; 1071 u16 pending_idx; 1072 1073 pending_idx = frag_get_pending_idx(frag); 1074 1075 txp = &vif->pending_tx_info[pending_idx].req; 1076 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1077 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1078 skb->len += txp->size; 1079 skb->data_len += txp->size; 1080 skb->truesize += txp->size; 1081 1082 /* Take an extra reference to offset xenvif_idx_release */ 1083 get_page(vif->mmap_pages[pending_idx]); 1084 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1085 } 1086 } 1087 1088 static int xenvif_get_extras(struct xenvif *vif, 1089 struct xen_netif_extra_info *extras, 1090 int work_to_do) 1091 { 1092 struct xen_netif_extra_info extra; 1093 RING_IDX cons = vif->tx.req_cons; 1094 1095 do { 1096 if (unlikely(work_to_do-- <= 0)) { 1097 netdev_err(vif->dev, "Missing extra info\n"); 1098 xenvif_fatal_tx_err(vif); 1099 return -EBADR; 1100 } 1101 1102 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1103 sizeof(extra)); 1104 if (unlikely(!extra.type || 1105 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1106 vif->tx.req_cons = ++cons; 1107 netdev_err(vif->dev, 1108 "Invalid extra type: %d\n", extra.type); 1109 xenvif_fatal_tx_err(vif); 1110 return -EINVAL; 1111 } 1112 1113 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1114 vif->tx.req_cons = ++cons; 1115 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1116 1117 return work_to_do; 1118 } 1119 1120 static int xenvif_set_skb_gso(struct xenvif *vif, 1121 struct sk_buff *skb, 1122 struct xen_netif_extra_info *gso) 1123 { 1124 if (!gso->u.gso.size) { 1125 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1126 xenvif_fatal_tx_err(vif); 1127 return -EINVAL; 1128 } 1129 1130 switch (gso->u.gso.type) { 1131 case XEN_NETIF_GSO_TYPE_TCPV4: 1132 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1133 break; 1134 case XEN_NETIF_GSO_TYPE_TCPV6: 1135 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 1136 break; 1137 default: 1138 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1139 xenvif_fatal_tx_err(vif); 1140 return -EINVAL; 1141 } 1142 1143 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1144 1145 /* Header must be checked, and gso_segs computed. */ 1146 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1147 skb_shinfo(skb)->gso_segs = 0; 1148 1149 return 0; 1150 } 1151 1152 static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len, 1153 unsigned int max) 1154 { 1155 if (skb_headlen(skb) >= len) 1156 return 0; 1157 1158 /* If we need to pullup then pullup to the max, so we 1159 * won't need to do it again. 1160 */ 1161 if (max > skb->len) 1162 max = skb->len; 1163 1164 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) 1165 return -ENOMEM; 1166 1167 if (skb_headlen(skb) < len) 1168 return -EPROTO; 1169 1170 return 0; 1171 } 1172 1173 /* This value should be large enough to cover a tagged ethernet header plus 1174 * maximally sized IP and TCP or UDP headers. 1175 */ 1176 #define MAX_IP_HDR_LEN 128 1177 1178 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, 1179 int recalculate_partial_csum) 1180 { 1181 unsigned int off; 1182 bool fragment; 1183 int err; 1184 1185 fragment = false; 1186 1187 err = maybe_pull_tail(skb, 1188 sizeof(struct iphdr), 1189 MAX_IP_HDR_LEN); 1190 if (err < 0) 1191 goto out; 1192 1193 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) 1194 fragment = true; 1195 1196 off = ip_hdrlen(skb); 1197 1198 err = -EPROTO; 1199 1200 switch (ip_hdr(skb)->protocol) { 1201 case IPPROTO_TCP: 1202 err = maybe_pull_tail(skb, 1203 off + sizeof(struct tcphdr), 1204 MAX_IP_HDR_LEN); 1205 if (err < 0) 1206 goto out; 1207 1208 if (!skb_partial_csum_set(skb, off, 1209 offsetof(struct tcphdr, check))) 1210 goto out; 1211 1212 if (recalculate_partial_csum) 1213 tcp_hdr(skb)->check = 1214 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1215 ip_hdr(skb)->daddr, 1216 skb->len - off, 1217 IPPROTO_TCP, 0); 1218 break; 1219 case IPPROTO_UDP: 1220 err = maybe_pull_tail(skb, 1221 off + sizeof(struct udphdr), 1222 MAX_IP_HDR_LEN); 1223 if (err < 0) 1224 goto out; 1225 1226 if (!skb_partial_csum_set(skb, off, 1227 offsetof(struct udphdr, check))) 1228 goto out; 1229 1230 if (recalculate_partial_csum) 1231 udp_hdr(skb)->check = 1232 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1233 ip_hdr(skb)->daddr, 1234 skb->len - off, 1235 IPPROTO_UDP, 0); 1236 break; 1237 default: 1238 goto out; 1239 } 1240 1241 err = 0; 1242 1243 out: 1244 return err; 1245 } 1246 1247 /* This value should be large enough to cover a tagged ethernet header plus 1248 * an IPv6 header, all options, and a maximal TCP or UDP header. 1249 */ 1250 #define MAX_IPV6_HDR_LEN 256 1251 1252 #define OPT_HDR(type, skb, off) \ 1253 (type *)(skb_network_header(skb) + (off)) 1254 1255 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, 1256 int recalculate_partial_csum) 1257 { 1258 int err; 1259 u8 nexthdr; 1260 unsigned int off; 1261 unsigned int len; 1262 bool fragment; 1263 bool done; 1264 1265 fragment = false; 1266 done = false; 1267 1268 off = sizeof(struct ipv6hdr); 1269 1270 err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); 1271 if (err < 0) 1272 goto out; 1273 1274 nexthdr = ipv6_hdr(skb)->nexthdr; 1275 1276 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 1277 while (off <= len && !done) { 1278 switch (nexthdr) { 1279 case IPPROTO_DSTOPTS: 1280 case IPPROTO_HOPOPTS: 1281 case IPPROTO_ROUTING: { 1282 struct ipv6_opt_hdr *hp; 1283 1284 err = maybe_pull_tail(skb, 1285 off + 1286 sizeof(struct ipv6_opt_hdr), 1287 MAX_IPV6_HDR_LEN); 1288 if (err < 0) 1289 goto out; 1290 1291 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); 1292 nexthdr = hp->nexthdr; 1293 off += ipv6_optlen(hp); 1294 break; 1295 } 1296 case IPPROTO_AH: { 1297 struct ip_auth_hdr *hp; 1298 1299 err = maybe_pull_tail(skb, 1300 off + 1301 sizeof(struct ip_auth_hdr), 1302 MAX_IPV6_HDR_LEN); 1303 if (err < 0) 1304 goto out; 1305 1306 hp = OPT_HDR(struct ip_auth_hdr, skb, off); 1307 nexthdr = hp->nexthdr; 1308 off += ipv6_authlen(hp); 1309 break; 1310 } 1311 case IPPROTO_FRAGMENT: { 1312 struct frag_hdr *hp; 1313 1314 err = maybe_pull_tail(skb, 1315 off + 1316 sizeof(struct frag_hdr), 1317 MAX_IPV6_HDR_LEN); 1318 if (err < 0) 1319 goto out; 1320 1321 hp = OPT_HDR(struct frag_hdr, skb, off); 1322 1323 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) 1324 fragment = true; 1325 1326 nexthdr = hp->nexthdr; 1327 off += sizeof(struct frag_hdr); 1328 break; 1329 } 1330 default: 1331 done = true; 1332 break; 1333 } 1334 } 1335 1336 err = -EPROTO; 1337 1338 if (!done || fragment) 1339 goto out; 1340 1341 switch (nexthdr) { 1342 case IPPROTO_TCP: 1343 err = maybe_pull_tail(skb, 1344 off + sizeof(struct tcphdr), 1345 MAX_IPV6_HDR_LEN); 1346 if (err < 0) 1347 goto out; 1348 1349 if (!skb_partial_csum_set(skb, off, 1350 offsetof(struct tcphdr, check))) 1351 goto out; 1352 1353 if (recalculate_partial_csum) 1354 tcp_hdr(skb)->check = 1355 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1356 &ipv6_hdr(skb)->daddr, 1357 skb->len - off, 1358 IPPROTO_TCP, 0); 1359 break; 1360 case IPPROTO_UDP: 1361 err = maybe_pull_tail(skb, 1362 off + sizeof(struct udphdr), 1363 MAX_IPV6_HDR_LEN); 1364 if (err < 0) 1365 goto out; 1366 1367 if (!skb_partial_csum_set(skb, off, 1368 offsetof(struct udphdr, check))) 1369 goto out; 1370 1371 if (recalculate_partial_csum) 1372 udp_hdr(skb)->check = 1373 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1374 &ipv6_hdr(skb)->daddr, 1375 skb->len - off, 1376 IPPROTO_UDP, 0); 1377 break; 1378 default: 1379 goto out; 1380 } 1381 1382 err = 0; 1383 1384 out: 1385 return err; 1386 } 1387 1388 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1389 { 1390 int err = -EPROTO; 1391 int recalculate_partial_csum = 0; 1392 1393 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1394 * peers can fail to set NETRXF_csum_blank when sending a GSO 1395 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1396 * recalculate the partial checksum. 1397 */ 1398 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1399 vif->rx_gso_checksum_fixup++; 1400 skb->ip_summed = CHECKSUM_PARTIAL; 1401 recalculate_partial_csum = 1; 1402 } 1403 1404 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1405 if (skb->ip_summed != CHECKSUM_PARTIAL) 1406 return 0; 1407 1408 if (skb->protocol == htons(ETH_P_IP)) 1409 err = checksum_setup_ip(vif, skb, recalculate_partial_csum); 1410 else if (skb->protocol == htons(ETH_P_IPV6)) 1411 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); 1412 1413 return err; 1414 } 1415 1416 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1417 { 1418 u64 now = get_jiffies_64(); 1419 u64 next_credit = vif->credit_window_start + 1420 msecs_to_jiffies(vif->credit_usec / 1000); 1421 1422 /* Timer could already be pending in rare cases. */ 1423 if (timer_pending(&vif->credit_timeout)) 1424 return true; 1425 1426 /* Passed the point where we can replenish credit? */ 1427 if (time_after_eq64(now, next_credit)) { 1428 vif->credit_window_start = now; 1429 tx_add_credit(vif); 1430 } 1431 1432 /* Still too big to send right now? Set a callback. */ 1433 if (size > vif->remaining_credit) { 1434 vif->credit_timeout.data = 1435 (unsigned long)vif; 1436 vif->credit_timeout.function = 1437 tx_credit_callback; 1438 mod_timer(&vif->credit_timeout, 1439 next_credit); 1440 vif->credit_window_start = next_credit; 1441 1442 return true; 1443 } 1444 1445 return false; 1446 } 1447 1448 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) 1449 { 1450 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1451 struct sk_buff *skb; 1452 int ret; 1453 1454 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1455 < MAX_PENDING_REQS) && 1456 (skb_queue_len(&vif->tx_queue) < budget)) { 1457 struct xen_netif_tx_request txreq; 1458 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1459 struct page *page; 1460 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1461 u16 pending_idx; 1462 RING_IDX idx; 1463 int work_to_do; 1464 unsigned int data_len; 1465 pending_ring_idx_t index; 1466 1467 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1468 XEN_NETIF_TX_RING_SIZE) { 1469 netdev_err(vif->dev, 1470 "Impossible number of requests. " 1471 "req_prod %d, req_cons %d, size %ld\n", 1472 vif->tx.sring->req_prod, vif->tx.req_cons, 1473 XEN_NETIF_TX_RING_SIZE); 1474 xenvif_fatal_tx_err(vif); 1475 continue; 1476 } 1477 1478 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); 1479 if (!work_to_do) 1480 break; 1481 1482 idx = vif->tx.req_cons; 1483 rmb(); /* Ensure that we see the request before we copy it. */ 1484 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1485 1486 /* Credit-based scheduling. */ 1487 if (txreq.size > vif->remaining_credit && 1488 tx_credit_exceeded(vif, txreq.size)) 1489 break; 1490 1491 vif->remaining_credit -= txreq.size; 1492 1493 work_to_do--; 1494 vif->tx.req_cons = ++idx; 1495 1496 memset(extras, 0, sizeof(extras)); 1497 if (txreq.flags & XEN_NETTXF_extra_info) { 1498 work_to_do = xenvif_get_extras(vif, extras, 1499 work_to_do); 1500 idx = vif->tx.req_cons; 1501 if (unlikely(work_to_do < 0)) 1502 break; 1503 } 1504 1505 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1506 if (unlikely(ret < 0)) 1507 break; 1508 1509 idx += ret; 1510 1511 if (unlikely(txreq.size < ETH_HLEN)) { 1512 netdev_dbg(vif->dev, 1513 "Bad packet size: %d\n", txreq.size); 1514 xenvif_tx_err(vif, &txreq, idx); 1515 break; 1516 } 1517 1518 /* No crossing a page as the payload mustn't fragment. */ 1519 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1520 netdev_err(vif->dev, 1521 "txreq.offset: %x, size: %u, end: %lu\n", 1522 txreq.offset, txreq.size, 1523 (txreq.offset&~PAGE_MASK) + txreq.size); 1524 xenvif_fatal_tx_err(vif); 1525 break; 1526 } 1527 1528 index = pending_index(vif->pending_cons); 1529 pending_idx = vif->pending_ring[index]; 1530 1531 data_len = (txreq.size > PKT_PROT_LEN && 1532 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1533 PKT_PROT_LEN : txreq.size; 1534 1535 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1536 GFP_ATOMIC | __GFP_NOWARN); 1537 if (unlikely(skb == NULL)) { 1538 netdev_dbg(vif->dev, 1539 "Can't allocate a skb in start_xmit.\n"); 1540 xenvif_tx_err(vif, &txreq, idx); 1541 break; 1542 } 1543 1544 /* Packets passed to netif_rx() must have some headroom. */ 1545 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1546 1547 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1548 struct xen_netif_extra_info *gso; 1549 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1550 1551 if (xenvif_set_skb_gso(vif, skb, gso)) { 1552 /* Failure in xenvif_set_skb_gso is fatal. */ 1553 kfree_skb(skb); 1554 break; 1555 } 1556 } 1557 1558 /* XXX could copy straight to head */ 1559 page = xenvif_alloc_page(vif, pending_idx); 1560 if (!page) { 1561 kfree_skb(skb); 1562 xenvif_tx_err(vif, &txreq, idx); 1563 break; 1564 } 1565 1566 gop->source.u.ref = txreq.gref; 1567 gop->source.domid = vif->domid; 1568 gop->source.offset = txreq.offset; 1569 1570 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1571 gop->dest.domid = DOMID_SELF; 1572 gop->dest.offset = txreq.offset; 1573 1574 gop->len = txreq.size; 1575 gop->flags = GNTCOPY_source_gref; 1576 1577 gop++; 1578 1579 memcpy(&vif->pending_tx_info[pending_idx].req, 1580 &txreq, sizeof(txreq)); 1581 vif->pending_tx_info[pending_idx].head = index; 1582 *((u16 *)skb->data) = pending_idx; 1583 1584 __skb_put(skb, data_len); 1585 1586 skb_shinfo(skb)->nr_frags = ret; 1587 if (data_len < txreq.size) { 1588 skb_shinfo(skb)->nr_frags++; 1589 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1590 pending_idx); 1591 } else { 1592 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1593 INVALID_PENDING_IDX); 1594 } 1595 1596 vif->pending_cons++; 1597 1598 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1599 if (request_gop == NULL) { 1600 kfree_skb(skb); 1601 xenvif_tx_err(vif, &txreq, idx); 1602 break; 1603 } 1604 gop = request_gop; 1605 1606 __skb_queue_tail(&vif->tx_queue, skb); 1607 1608 vif->tx.req_cons = idx; 1609 1610 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1611 break; 1612 } 1613 1614 return gop - vif->tx_copy_ops; 1615 } 1616 1617 1618 static int xenvif_tx_submit(struct xenvif *vif) 1619 { 1620 struct gnttab_copy *gop = vif->tx_copy_ops; 1621 struct sk_buff *skb; 1622 int work_done = 0; 1623 1624 while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1625 struct xen_netif_tx_request *txp; 1626 u16 pending_idx; 1627 unsigned data_len; 1628 1629 pending_idx = *((u16 *)skb->data); 1630 txp = &vif->pending_tx_info[pending_idx].req; 1631 1632 /* Check the remap error code. */ 1633 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { 1634 netdev_dbg(vif->dev, "netback grant failed.\n"); 1635 skb_shinfo(skb)->nr_frags = 0; 1636 kfree_skb(skb); 1637 continue; 1638 } 1639 1640 data_len = skb->len; 1641 memcpy(skb->data, 1642 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1643 data_len); 1644 if (data_len < txp->size) { 1645 /* Append the packet payload as a fragment. */ 1646 txp->offset += data_len; 1647 txp->size -= data_len; 1648 } else { 1649 /* Schedule a response immediately. */ 1650 xenvif_idx_release(vif, pending_idx, 1651 XEN_NETIF_RSP_OKAY); 1652 } 1653 1654 if (txp->flags & XEN_NETTXF_csum_blank) 1655 skb->ip_summed = CHECKSUM_PARTIAL; 1656 else if (txp->flags & XEN_NETTXF_data_validated) 1657 skb->ip_summed = CHECKSUM_UNNECESSARY; 1658 1659 xenvif_fill_frags(vif, skb); 1660 1661 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { 1662 int target = min_t(int, skb->len, PKT_PROT_LEN); 1663 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1664 } 1665 1666 skb->dev = vif->dev; 1667 skb->protocol = eth_type_trans(skb, skb->dev); 1668 skb_reset_network_header(skb); 1669 1670 if (checksum_setup(vif, skb)) { 1671 netdev_dbg(vif->dev, 1672 "Can't setup checksum in net_tx_action\n"); 1673 kfree_skb(skb); 1674 continue; 1675 } 1676 1677 skb_probe_transport_header(skb, 0); 1678 1679 vif->dev->stats.rx_bytes += skb->len; 1680 vif->dev->stats.rx_packets++; 1681 1682 work_done++; 1683 1684 netif_receive_skb(skb); 1685 } 1686 1687 return work_done; 1688 } 1689 1690 /* Called after netfront has transmitted */ 1691 int xenvif_tx_action(struct xenvif *vif, int budget) 1692 { 1693 unsigned nr_gops; 1694 int work_done; 1695 1696 if (unlikely(!tx_work_todo(vif))) 1697 return 0; 1698 1699 nr_gops = xenvif_tx_build_gops(vif, budget); 1700 1701 if (nr_gops == 0) 1702 return 0; 1703 1704 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1705 1706 work_done = xenvif_tx_submit(vif); 1707 1708 return work_done; 1709 } 1710 1711 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1712 u8 status) 1713 { 1714 struct pending_tx_info *pending_tx_info; 1715 pending_ring_idx_t head; 1716 u16 peek; /* peek into next tx request */ 1717 1718 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1719 1720 /* Already complete? */ 1721 if (vif->mmap_pages[pending_idx] == NULL) 1722 return; 1723 1724 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1725 1726 head = pending_tx_info->head; 1727 1728 BUG_ON(!pending_tx_is_head(vif, head)); 1729 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); 1730 1731 do { 1732 pending_ring_idx_t index; 1733 pending_ring_idx_t idx = pending_index(head); 1734 u16 info_idx = vif->pending_ring[idx]; 1735 1736 pending_tx_info = &vif->pending_tx_info[info_idx]; 1737 make_tx_response(vif, &pending_tx_info->req, status); 1738 1739 /* Setting any number other than 1740 * INVALID_PENDING_RING_IDX indicates this slot is 1741 * starting a new packet / ending a previous packet. 1742 */ 1743 pending_tx_info->head = 0; 1744 1745 index = pending_index(vif->pending_prod++); 1746 vif->pending_ring[index] = vif->pending_ring[info_idx]; 1747 1748 peek = vif->pending_ring[pending_index(++head)]; 1749 1750 } while (!pending_tx_is_head(vif, peek)); 1751 1752 put_page(vif->mmap_pages[pending_idx]); 1753 vif->mmap_pages[pending_idx] = NULL; 1754 } 1755 1756 1757 static void make_tx_response(struct xenvif *vif, 1758 struct xen_netif_tx_request *txp, 1759 s8 st) 1760 { 1761 RING_IDX i = vif->tx.rsp_prod_pvt; 1762 struct xen_netif_tx_response *resp; 1763 int notify; 1764 1765 resp = RING_GET_RESPONSE(&vif->tx, i); 1766 resp->id = txp->id; 1767 resp->status = st; 1768 1769 if (txp->flags & XEN_NETTXF_extra_info) 1770 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1771 1772 vif->tx.rsp_prod_pvt = ++i; 1773 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1774 if (notify) 1775 notify_remote_via_irq(vif->tx_irq); 1776 } 1777 1778 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1779 u16 id, 1780 s8 st, 1781 u16 offset, 1782 u16 size, 1783 u16 flags) 1784 { 1785 RING_IDX i = vif->rx.rsp_prod_pvt; 1786 struct xen_netif_rx_response *resp; 1787 1788 resp = RING_GET_RESPONSE(&vif->rx, i); 1789 resp->offset = offset; 1790 resp->flags = flags; 1791 resp->id = id; 1792 resp->status = (s16)size; 1793 if (st < 0) 1794 resp->status = (s16)st; 1795 1796 vif->rx.rsp_prod_pvt = ++i; 1797 1798 return resp; 1799 } 1800 1801 static inline int rx_work_todo(struct xenvif *vif) 1802 { 1803 return !skb_queue_empty(&vif->rx_queue); 1804 } 1805 1806 static inline int tx_work_todo(struct xenvif *vif) 1807 { 1808 1809 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && 1810 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1811 < MAX_PENDING_REQS)) 1812 return 1; 1813 1814 return 0; 1815 } 1816 1817 void xenvif_unmap_frontend_rings(struct xenvif *vif) 1818 { 1819 if (vif->tx.sring) 1820 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1821 vif->tx.sring); 1822 if (vif->rx.sring) 1823 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1824 vif->rx.sring); 1825 } 1826 1827 int xenvif_map_frontend_rings(struct xenvif *vif, 1828 grant_ref_t tx_ring_ref, 1829 grant_ref_t rx_ring_ref) 1830 { 1831 void *addr; 1832 struct xen_netif_tx_sring *txs; 1833 struct xen_netif_rx_sring *rxs; 1834 1835 int err = -ENOMEM; 1836 1837 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1838 tx_ring_ref, &addr); 1839 if (err) 1840 goto err; 1841 1842 txs = (struct xen_netif_tx_sring *)addr; 1843 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1844 1845 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1846 rx_ring_ref, &addr); 1847 if (err) 1848 goto err; 1849 1850 rxs = (struct xen_netif_rx_sring *)addr; 1851 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1852 1853 vif->rx_req_cons_peek = 0; 1854 1855 return 0; 1856 1857 err: 1858 xenvif_unmap_frontend_rings(vif); 1859 return err; 1860 } 1861 1862 int xenvif_kthread(void *data) 1863 { 1864 struct xenvif *vif = data; 1865 1866 while (!kthread_should_stop()) { 1867 wait_event_interruptible(vif->wq, 1868 rx_work_todo(vif) || 1869 kthread_should_stop()); 1870 if (kthread_should_stop()) 1871 break; 1872 1873 if (rx_work_todo(vif)) 1874 xenvif_rx_action(vif); 1875 1876 cond_resched(); 1877 } 1878 1879 return 0; 1880 } 1881 1882 static int __init netback_init(void) 1883 { 1884 int rc = 0; 1885 1886 if (!xen_domain()) 1887 return -ENODEV; 1888 1889 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1890 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1891 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1892 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1893 } 1894 1895 rc = xenvif_xenbus_init(); 1896 if (rc) 1897 goto failed_init; 1898 1899 return 0; 1900 1901 failed_init: 1902 return rc; 1903 } 1904 1905 module_init(netback_init); 1906 1907 static void __exit netback_fini(void) 1908 { 1909 xenvif_xenbus_fini(); 1910 } 1911 module_exit(netback_fini); 1912 1913 MODULE_LICENSE("Dual BSD/GPL"); 1914 MODULE_ALIAS("xen-backend:vif"); 1915