1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 #include <linux/highmem.h> 41 42 #include <net/tcp.h> 43 44 #include <xen/xen.h> 45 #include <xen/events.h> 46 #include <xen/interface/memory.h> 47 #include <xen/page.h> 48 49 #include <asm/xen/hypercall.h> 50 51 /* Provide an option to disable split event channels at load time as 52 * event channels are limited resource. Split event channels are 53 * enabled by default. 54 */ 55 bool separate_tx_rx_irq = true; 56 module_param(separate_tx_rx_irq, bool, 0644); 57 58 /* The time that packets can stay on the guest Rx internal queue 59 * before they are dropped. 60 */ 61 unsigned int rx_drain_timeout_msecs = 10000; 62 module_param(rx_drain_timeout_msecs, uint, 0444); 63 64 /* The length of time before the frontend is considered unresponsive 65 * because it isn't providing Rx slots. 66 */ 67 unsigned int rx_stall_timeout_msecs = 60000; 68 module_param(rx_stall_timeout_msecs, uint, 0444); 69 70 unsigned int xenvif_max_queues; 71 module_param_named(max_queues, xenvif_max_queues, uint, 0644); 72 MODULE_PARM_DESC(max_queues, 73 "Maximum number of queues per virtual interface"); 74 75 /* 76 * This is the maximum slots a skb can have. If a guest sends a skb 77 * which exceeds this limit it is considered malicious. 78 */ 79 #define FATAL_SKB_SLOTS_DEFAULT 20 80 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 81 module_param(fatal_skb_slots, uint, 0444); 82 83 /* The amount to copy out of the first guest Tx slot into the skb's 84 * linear area. If the first slot has more data, it will be mapped 85 * and put into the first frag. 86 * 87 * This is sized to avoid pulling headers from the frags for most 88 * TCP/IP packets. 89 */ 90 #define XEN_NETBACK_TX_COPY_LEN 128 91 92 /* This is the maximum number of flows in the hash cache. */ 93 #define XENVIF_HASH_CACHE_SIZE_DEFAULT 64 94 unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT; 95 module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644); 96 MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache"); 97 98 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, 99 u8 status); 100 101 static void make_tx_response(struct xenvif_queue *queue, 102 struct xen_netif_tx_request *txp, 103 unsigned int extra_count, 104 s8 st); 105 static void push_tx_responses(struct xenvif_queue *queue); 106 107 static inline int tx_work_todo(struct xenvif_queue *queue); 108 109 static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, 110 u16 idx) 111 { 112 return page_to_pfn(queue->mmap_pages[idx]); 113 } 114 115 static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue, 116 u16 idx) 117 { 118 return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx)); 119 } 120 121 #define callback_param(vif, pending_idx) \ 122 (vif->pending_tx_info[pending_idx].callback_struct) 123 124 /* Find the containing VIF's structure from a pointer in pending_tx_info array 125 */ 126 static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) 127 { 128 u16 pending_idx = ubuf->desc; 129 struct pending_tx_info *temp = 130 container_of(ubuf, struct pending_tx_info, callback_struct); 131 return container_of(temp - pending_idx, 132 struct xenvif_queue, 133 pending_tx_info[0]); 134 } 135 136 static u16 frag_get_pending_idx(skb_frag_t *frag) 137 { 138 return (u16)frag->page_offset; 139 } 140 141 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 142 { 143 frag->page_offset = pending_idx; 144 } 145 146 static inline pending_ring_idx_t pending_index(unsigned i) 147 { 148 return i & (MAX_PENDING_REQS-1); 149 } 150 151 void xenvif_kick_thread(struct xenvif_queue *queue) 152 { 153 wake_up(&queue->wq); 154 } 155 156 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) 157 { 158 int more_to_do; 159 160 RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do); 161 162 if (more_to_do) 163 napi_schedule(&queue->napi); 164 } 165 166 static void tx_add_credit(struct xenvif_queue *queue) 167 { 168 unsigned long max_burst, max_credit; 169 170 /* 171 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 172 * Otherwise the interface can seize up due to insufficient credit. 173 */ 174 max_burst = max(131072UL, queue->credit_bytes); 175 176 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 177 max_credit = queue->remaining_credit + queue->credit_bytes; 178 if (max_credit < queue->remaining_credit) 179 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 180 181 queue->remaining_credit = min(max_credit, max_burst); 182 } 183 184 void xenvif_tx_credit_callback(unsigned long data) 185 { 186 struct xenvif_queue *queue = (struct xenvif_queue *)data; 187 tx_add_credit(queue); 188 xenvif_napi_schedule_or_enable_events(queue); 189 } 190 191 static void xenvif_tx_err(struct xenvif_queue *queue, 192 struct xen_netif_tx_request *txp, 193 unsigned int extra_count, RING_IDX end) 194 { 195 RING_IDX cons = queue->tx.req_cons; 196 unsigned long flags; 197 198 do { 199 spin_lock_irqsave(&queue->response_lock, flags); 200 make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); 201 push_tx_responses(queue); 202 spin_unlock_irqrestore(&queue->response_lock, flags); 203 if (cons == end) 204 break; 205 RING_COPY_REQUEST(&queue->tx, cons++, txp); 206 extra_count = 0; /* only the first frag can have extras */ 207 } while (1); 208 queue->tx.req_cons = cons; 209 } 210 211 static void xenvif_fatal_tx_err(struct xenvif *vif) 212 { 213 netdev_err(vif->dev, "fatal error; disabling device\n"); 214 vif->disabled = true; 215 /* Disable the vif from queue 0's kthread */ 216 if (vif->queues) 217 xenvif_kick_thread(&vif->queues[0]); 218 } 219 220 static int xenvif_count_requests(struct xenvif_queue *queue, 221 struct xen_netif_tx_request *first, 222 unsigned int extra_count, 223 struct xen_netif_tx_request *txp, 224 int work_to_do) 225 { 226 RING_IDX cons = queue->tx.req_cons; 227 int slots = 0; 228 int drop_err = 0; 229 int more_data; 230 231 if (!(first->flags & XEN_NETTXF_more_data)) 232 return 0; 233 234 do { 235 struct xen_netif_tx_request dropped_tx = { 0 }; 236 237 if (slots >= work_to_do) { 238 netdev_err(queue->vif->dev, 239 "Asked for %d slots but exceeds this limit\n", 240 work_to_do); 241 xenvif_fatal_tx_err(queue->vif); 242 return -ENODATA; 243 } 244 245 /* This guest is really using too many slots and 246 * considered malicious. 247 */ 248 if (unlikely(slots >= fatal_skb_slots)) { 249 netdev_err(queue->vif->dev, 250 "Malicious frontend using %d slots, threshold %u\n", 251 slots, fatal_skb_slots); 252 xenvif_fatal_tx_err(queue->vif); 253 return -E2BIG; 254 } 255 256 /* Xen network protocol had implicit dependency on 257 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 258 * the historical MAX_SKB_FRAGS value 18 to honor the 259 * same behavior as before. Any packet using more than 260 * 18 slots but less than fatal_skb_slots slots is 261 * dropped 262 */ 263 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 264 if (net_ratelimit()) 265 netdev_dbg(queue->vif->dev, 266 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 267 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 268 drop_err = -E2BIG; 269 } 270 271 if (drop_err) 272 txp = &dropped_tx; 273 274 RING_COPY_REQUEST(&queue->tx, cons + slots, txp); 275 276 /* If the guest submitted a frame >= 64 KiB then 277 * first->size overflowed and following slots will 278 * appear to be larger than the frame. 279 * 280 * This cannot be fatal error as there are buggy 281 * frontends that do this. 282 * 283 * Consume all slots and drop the packet. 284 */ 285 if (!drop_err && txp->size > first->size) { 286 if (net_ratelimit()) 287 netdev_dbg(queue->vif->dev, 288 "Invalid tx request, slot size %u > remaining size %u\n", 289 txp->size, first->size); 290 drop_err = -EIO; 291 } 292 293 first->size -= txp->size; 294 slots++; 295 296 if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) { 297 netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n", 298 txp->offset, txp->size); 299 xenvif_fatal_tx_err(queue->vif); 300 return -EINVAL; 301 } 302 303 more_data = txp->flags & XEN_NETTXF_more_data; 304 305 if (!drop_err) 306 txp++; 307 308 } while (more_data); 309 310 if (drop_err) { 311 xenvif_tx_err(queue, first, extra_count, cons + slots); 312 return drop_err; 313 } 314 315 return slots; 316 } 317 318 319 struct xenvif_tx_cb { 320 u16 pending_idx; 321 }; 322 323 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) 324 325 static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, 326 u16 pending_idx, 327 struct xen_netif_tx_request *txp, 328 unsigned int extra_count, 329 struct gnttab_map_grant_ref *mop) 330 { 331 queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx]; 332 gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx), 333 GNTMAP_host_map | GNTMAP_readonly, 334 txp->gref, queue->vif->domid); 335 336 memcpy(&queue->pending_tx_info[pending_idx].req, txp, 337 sizeof(*txp)); 338 queue->pending_tx_info[pending_idx].extra_count = extra_count; 339 } 340 341 static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) 342 { 343 struct sk_buff *skb = 344 alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, 345 GFP_ATOMIC | __GFP_NOWARN); 346 if (unlikely(skb == NULL)) 347 return NULL; 348 349 /* Packets passed to netif_rx() must have some headroom. */ 350 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 351 352 /* Initialize it here to avoid later surprises */ 353 skb_shinfo(skb)->destructor_arg = NULL; 354 355 return skb; 356 } 357 358 static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, 359 struct sk_buff *skb, 360 struct xen_netif_tx_request *txp, 361 struct gnttab_map_grant_ref *gop, 362 unsigned int frag_overflow, 363 struct sk_buff *nskb) 364 { 365 struct skb_shared_info *shinfo = skb_shinfo(skb); 366 skb_frag_t *frags = shinfo->frags; 367 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 368 int start; 369 pending_ring_idx_t index; 370 unsigned int nr_slots; 371 372 nr_slots = shinfo->nr_frags; 373 374 /* Skip first skb fragment if it is on same page as header fragment. */ 375 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 376 377 for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; 378 shinfo->nr_frags++, txp++, gop++) { 379 index = pending_index(queue->pending_cons++); 380 pending_idx = queue->pending_ring[index]; 381 xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); 382 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); 383 } 384 385 if (frag_overflow) { 386 387 shinfo = skb_shinfo(nskb); 388 frags = shinfo->frags; 389 390 for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; 391 shinfo->nr_frags++, txp++, gop++) { 392 index = pending_index(queue->pending_cons++); 393 pending_idx = queue->pending_ring[index]; 394 xenvif_tx_create_map_op(queue, pending_idx, txp, 0, 395 gop); 396 frag_set_pending_idx(&frags[shinfo->nr_frags], 397 pending_idx); 398 } 399 400 skb_shinfo(skb)->frag_list = nskb; 401 } 402 403 return gop; 404 } 405 406 static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, 407 u16 pending_idx, 408 grant_handle_t handle) 409 { 410 if (unlikely(queue->grant_tx_handle[pending_idx] != 411 NETBACK_INVALID_HANDLE)) { 412 netdev_err(queue->vif->dev, 413 "Trying to overwrite active handle! pending_idx: 0x%x\n", 414 pending_idx); 415 BUG(); 416 } 417 queue->grant_tx_handle[pending_idx] = handle; 418 } 419 420 static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue, 421 u16 pending_idx) 422 { 423 if (unlikely(queue->grant_tx_handle[pending_idx] == 424 NETBACK_INVALID_HANDLE)) { 425 netdev_err(queue->vif->dev, 426 "Trying to unmap invalid handle! pending_idx: 0x%x\n", 427 pending_idx); 428 BUG(); 429 } 430 queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE; 431 } 432 433 static int xenvif_tx_check_gop(struct xenvif_queue *queue, 434 struct sk_buff *skb, 435 struct gnttab_map_grant_ref **gopp_map, 436 struct gnttab_copy **gopp_copy) 437 { 438 struct gnttab_map_grant_ref *gop_map = *gopp_map; 439 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 440 /* This always points to the shinfo of the skb being checked, which 441 * could be either the first or the one on the frag_list 442 */ 443 struct skb_shared_info *shinfo = skb_shinfo(skb); 444 /* If this is non-NULL, we are currently checking the frag_list skb, and 445 * this points to the shinfo of the first one 446 */ 447 struct skb_shared_info *first_shinfo = NULL; 448 int nr_frags = shinfo->nr_frags; 449 const bool sharedslot = nr_frags && 450 frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; 451 int i, err; 452 453 /* Check status of header. */ 454 err = (*gopp_copy)->status; 455 if (unlikely(err)) { 456 if (net_ratelimit()) 457 netdev_dbg(queue->vif->dev, 458 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", 459 (*gopp_copy)->status, 460 pending_idx, 461 (*gopp_copy)->source.u.ref); 462 /* The first frag might still have this slot mapped */ 463 if (!sharedslot) 464 xenvif_idx_release(queue, pending_idx, 465 XEN_NETIF_RSP_ERROR); 466 } 467 (*gopp_copy)++; 468 469 check_frags: 470 for (i = 0; i < nr_frags; i++, gop_map++) { 471 int j, newerr; 472 473 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 474 475 /* Check error status: if okay then remember grant handle. */ 476 newerr = gop_map->status; 477 478 if (likely(!newerr)) { 479 xenvif_grant_handle_set(queue, 480 pending_idx, 481 gop_map->handle); 482 /* Had a previous error? Invalidate this fragment. */ 483 if (unlikely(err)) { 484 xenvif_idx_unmap(queue, pending_idx); 485 /* If the mapping of the first frag was OK, but 486 * the header's copy failed, and they are 487 * sharing a slot, send an error 488 */ 489 if (i == 0 && sharedslot) 490 xenvif_idx_release(queue, pending_idx, 491 XEN_NETIF_RSP_ERROR); 492 else 493 xenvif_idx_release(queue, pending_idx, 494 XEN_NETIF_RSP_OKAY); 495 } 496 continue; 497 } 498 499 /* Error on this fragment: respond to client with an error. */ 500 if (net_ratelimit()) 501 netdev_dbg(queue->vif->dev, 502 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n", 503 i, 504 gop_map->status, 505 pending_idx, 506 gop_map->ref); 507 508 xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); 509 510 /* Not the first error? Preceding frags already invalidated. */ 511 if (err) 512 continue; 513 514 /* First error: if the header haven't shared a slot with the 515 * first frag, release it as well. 516 */ 517 if (!sharedslot) 518 xenvif_idx_release(queue, 519 XENVIF_TX_CB(skb)->pending_idx, 520 XEN_NETIF_RSP_OKAY); 521 522 /* Invalidate preceding fragments of this skb. */ 523 for (j = 0; j < i; j++) { 524 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 525 xenvif_idx_unmap(queue, pending_idx); 526 xenvif_idx_release(queue, pending_idx, 527 XEN_NETIF_RSP_OKAY); 528 } 529 530 /* And if we found the error while checking the frag_list, unmap 531 * the first skb's frags 532 */ 533 if (first_shinfo) { 534 for (j = 0; j < first_shinfo->nr_frags; j++) { 535 pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]); 536 xenvif_idx_unmap(queue, pending_idx); 537 xenvif_idx_release(queue, pending_idx, 538 XEN_NETIF_RSP_OKAY); 539 } 540 } 541 542 /* Remember the error: invalidate all subsequent fragments. */ 543 err = newerr; 544 } 545 546 if (skb_has_frag_list(skb) && !first_shinfo) { 547 first_shinfo = skb_shinfo(skb); 548 shinfo = skb_shinfo(skb_shinfo(skb)->frag_list); 549 nr_frags = shinfo->nr_frags; 550 551 goto check_frags; 552 } 553 554 *gopp_map = gop_map; 555 return err; 556 } 557 558 static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb) 559 { 560 struct skb_shared_info *shinfo = skb_shinfo(skb); 561 int nr_frags = shinfo->nr_frags; 562 int i; 563 u16 prev_pending_idx = INVALID_PENDING_IDX; 564 565 for (i = 0; i < nr_frags; i++) { 566 skb_frag_t *frag = shinfo->frags + i; 567 struct xen_netif_tx_request *txp; 568 struct page *page; 569 u16 pending_idx; 570 571 pending_idx = frag_get_pending_idx(frag); 572 573 /* If this is not the first frag, chain it to the previous*/ 574 if (prev_pending_idx == INVALID_PENDING_IDX) 575 skb_shinfo(skb)->destructor_arg = 576 &callback_param(queue, pending_idx); 577 else 578 callback_param(queue, prev_pending_idx).ctx = 579 &callback_param(queue, pending_idx); 580 581 callback_param(queue, pending_idx).ctx = NULL; 582 prev_pending_idx = pending_idx; 583 584 txp = &queue->pending_tx_info[pending_idx].req; 585 page = virt_to_page(idx_to_kaddr(queue, pending_idx)); 586 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 587 skb->len += txp->size; 588 skb->data_len += txp->size; 589 skb->truesize += txp->size; 590 591 /* Take an extra reference to offset network stack's put_page */ 592 get_page(queue->mmap_pages[pending_idx]); 593 } 594 } 595 596 static int xenvif_get_extras(struct xenvif_queue *queue, 597 struct xen_netif_extra_info *extras, 598 unsigned int *extra_count, 599 int work_to_do) 600 { 601 struct xen_netif_extra_info extra; 602 RING_IDX cons = queue->tx.req_cons; 603 604 do { 605 if (unlikely(work_to_do-- <= 0)) { 606 netdev_err(queue->vif->dev, "Missing extra info\n"); 607 xenvif_fatal_tx_err(queue->vif); 608 return -EBADR; 609 } 610 611 RING_COPY_REQUEST(&queue->tx, cons, &extra); 612 613 queue->tx.req_cons = ++cons; 614 (*extra_count)++; 615 616 if (unlikely(!extra.type || 617 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 618 netdev_err(queue->vif->dev, 619 "Invalid extra type: %d\n", extra.type); 620 xenvif_fatal_tx_err(queue->vif); 621 return -EINVAL; 622 } 623 624 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 625 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 626 627 return work_to_do; 628 } 629 630 static int xenvif_set_skb_gso(struct xenvif *vif, 631 struct sk_buff *skb, 632 struct xen_netif_extra_info *gso) 633 { 634 if (!gso->u.gso.size) { 635 netdev_err(vif->dev, "GSO size must not be zero.\n"); 636 xenvif_fatal_tx_err(vif); 637 return -EINVAL; 638 } 639 640 switch (gso->u.gso.type) { 641 case XEN_NETIF_GSO_TYPE_TCPV4: 642 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 643 break; 644 case XEN_NETIF_GSO_TYPE_TCPV6: 645 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 646 break; 647 default: 648 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 649 xenvif_fatal_tx_err(vif); 650 return -EINVAL; 651 } 652 653 skb_shinfo(skb)->gso_size = gso->u.gso.size; 654 /* gso_segs will be calculated later */ 655 656 return 0; 657 } 658 659 static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb) 660 { 661 bool recalculate_partial_csum = false; 662 663 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 664 * peers can fail to set NETRXF_csum_blank when sending a GSO 665 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 666 * recalculate the partial checksum. 667 */ 668 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 669 queue->stats.rx_gso_checksum_fixup++; 670 skb->ip_summed = CHECKSUM_PARTIAL; 671 recalculate_partial_csum = true; 672 } 673 674 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 675 if (skb->ip_summed != CHECKSUM_PARTIAL) 676 return 0; 677 678 return skb_checksum_setup(skb, recalculate_partial_csum); 679 } 680 681 static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) 682 { 683 u64 now = get_jiffies_64(); 684 u64 next_credit = queue->credit_window_start + 685 msecs_to_jiffies(queue->credit_usec / 1000); 686 687 /* Timer could already be pending in rare cases. */ 688 if (timer_pending(&queue->credit_timeout)) 689 return true; 690 691 /* Passed the point where we can replenish credit? */ 692 if (time_after_eq64(now, next_credit)) { 693 queue->credit_window_start = now; 694 tx_add_credit(queue); 695 } 696 697 /* Still too big to send right now? Set a callback. */ 698 if (size > queue->remaining_credit) { 699 queue->credit_timeout.data = 700 (unsigned long)queue; 701 mod_timer(&queue->credit_timeout, 702 next_credit); 703 queue->credit_window_start = next_credit; 704 705 return true; 706 } 707 708 return false; 709 } 710 711 /* No locking is required in xenvif_mcast_add/del() as they are 712 * only ever invoked from NAPI poll. An RCU list is used because 713 * xenvif_mcast_match() is called asynchronously, during start_xmit. 714 */ 715 716 static int xenvif_mcast_add(struct xenvif *vif, const u8 *addr) 717 { 718 struct xenvif_mcast_addr *mcast; 719 720 if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) { 721 if (net_ratelimit()) 722 netdev_err(vif->dev, 723 "Too many multicast addresses\n"); 724 return -ENOSPC; 725 } 726 727 mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC); 728 if (!mcast) 729 return -ENOMEM; 730 731 ether_addr_copy(mcast->addr, addr); 732 list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr); 733 vif->fe_mcast_count++; 734 735 return 0; 736 } 737 738 static void xenvif_mcast_del(struct xenvif *vif, const u8 *addr) 739 { 740 struct xenvif_mcast_addr *mcast; 741 742 list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { 743 if (ether_addr_equal(addr, mcast->addr)) { 744 --vif->fe_mcast_count; 745 list_del_rcu(&mcast->entry); 746 kfree_rcu(mcast, rcu); 747 break; 748 } 749 } 750 } 751 752 bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr) 753 { 754 struct xenvif_mcast_addr *mcast; 755 756 rcu_read_lock(); 757 list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { 758 if (ether_addr_equal(addr, mcast->addr)) { 759 rcu_read_unlock(); 760 return true; 761 } 762 } 763 rcu_read_unlock(); 764 765 return false; 766 } 767 768 void xenvif_mcast_addr_list_free(struct xenvif *vif) 769 { 770 /* No need for locking or RCU here. NAPI poll and TX queue 771 * are stopped. 772 */ 773 while (!list_empty(&vif->fe_mcast_addr)) { 774 struct xenvif_mcast_addr *mcast; 775 776 mcast = list_first_entry(&vif->fe_mcast_addr, 777 struct xenvif_mcast_addr, 778 entry); 779 --vif->fe_mcast_count; 780 list_del(&mcast->entry); 781 kfree(mcast); 782 } 783 } 784 785 static void xenvif_tx_build_gops(struct xenvif_queue *queue, 786 int budget, 787 unsigned *copy_ops, 788 unsigned *map_ops) 789 { 790 struct gnttab_map_grant_ref *gop = queue->tx_map_ops; 791 struct sk_buff *skb, *nskb; 792 int ret; 793 unsigned int frag_overflow; 794 795 while (skb_queue_len(&queue->tx_queue) < budget) { 796 struct xen_netif_tx_request txreq; 797 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 798 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 799 unsigned int extra_count; 800 u16 pending_idx; 801 RING_IDX idx; 802 int work_to_do; 803 unsigned int data_len; 804 pending_ring_idx_t index; 805 806 if (queue->tx.sring->req_prod - queue->tx.req_cons > 807 XEN_NETIF_TX_RING_SIZE) { 808 netdev_err(queue->vif->dev, 809 "Impossible number of requests. " 810 "req_prod %d, req_cons %d, size %ld\n", 811 queue->tx.sring->req_prod, queue->tx.req_cons, 812 XEN_NETIF_TX_RING_SIZE); 813 xenvif_fatal_tx_err(queue->vif); 814 break; 815 } 816 817 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); 818 if (!work_to_do) 819 break; 820 821 idx = queue->tx.req_cons; 822 rmb(); /* Ensure that we see the request before we copy it. */ 823 RING_COPY_REQUEST(&queue->tx, idx, &txreq); 824 825 /* Credit-based scheduling. */ 826 if (txreq.size > queue->remaining_credit && 827 tx_credit_exceeded(queue, txreq.size)) 828 break; 829 830 queue->remaining_credit -= txreq.size; 831 832 work_to_do--; 833 queue->tx.req_cons = ++idx; 834 835 memset(extras, 0, sizeof(extras)); 836 extra_count = 0; 837 if (txreq.flags & XEN_NETTXF_extra_info) { 838 work_to_do = xenvif_get_extras(queue, extras, 839 &extra_count, 840 work_to_do); 841 idx = queue->tx.req_cons; 842 if (unlikely(work_to_do < 0)) 843 break; 844 } 845 846 if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) { 847 struct xen_netif_extra_info *extra; 848 849 extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1]; 850 ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr); 851 852 make_tx_response(queue, &txreq, extra_count, 853 (ret == 0) ? 854 XEN_NETIF_RSP_OKAY : 855 XEN_NETIF_RSP_ERROR); 856 push_tx_responses(queue); 857 continue; 858 } 859 860 if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) { 861 struct xen_netif_extra_info *extra; 862 863 extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1]; 864 xenvif_mcast_del(queue->vif, extra->u.mcast.addr); 865 866 make_tx_response(queue, &txreq, extra_count, 867 XEN_NETIF_RSP_OKAY); 868 push_tx_responses(queue); 869 continue; 870 } 871 872 ret = xenvif_count_requests(queue, &txreq, extra_count, 873 txfrags, work_to_do); 874 if (unlikely(ret < 0)) 875 break; 876 877 idx += ret; 878 879 if (unlikely(txreq.size < ETH_HLEN)) { 880 netdev_dbg(queue->vif->dev, 881 "Bad packet size: %d\n", txreq.size); 882 xenvif_tx_err(queue, &txreq, extra_count, idx); 883 break; 884 } 885 886 /* No crossing a page as the payload mustn't fragment. */ 887 if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { 888 netdev_err(queue->vif->dev, 889 "txreq.offset: %u, size: %u, end: %lu\n", 890 txreq.offset, txreq.size, 891 (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); 892 xenvif_fatal_tx_err(queue->vif); 893 break; 894 } 895 896 index = pending_index(queue->pending_cons); 897 pending_idx = queue->pending_ring[index]; 898 899 data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && 900 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 901 XEN_NETBACK_TX_COPY_LEN : txreq.size; 902 903 skb = xenvif_alloc_skb(data_len); 904 if (unlikely(skb == NULL)) { 905 netdev_dbg(queue->vif->dev, 906 "Can't allocate a skb in start_xmit.\n"); 907 xenvif_tx_err(queue, &txreq, extra_count, idx); 908 break; 909 } 910 911 skb_shinfo(skb)->nr_frags = ret; 912 if (data_len < txreq.size) 913 skb_shinfo(skb)->nr_frags++; 914 /* At this point shinfo->nr_frags is in fact the number of 915 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 916 */ 917 frag_overflow = 0; 918 nskb = NULL; 919 if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) { 920 frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS; 921 BUG_ON(frag_overflow > MAX_SKB_FRAGS); 922 skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; 923 nskb = xenvif_alloc_skb(0); 924 if (unlikely(nskb == NULL)) { 925 kfree_skb(skb); 926 xenvif_tx_err(queue, &txreq, extra_count, idx); 927 if (net_ratelimit()) 928 netdev_err(queue->vif->dev, 929 "Can't allocate the frag_list skb.\n"); 930 break; 931 } 932 } 933 934 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 935 struct xen_netif_extra_info *gso; 936 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 937 938 if (xenvif_set_skb_gso(queue->vif, skb, gso)) { 939 /* Failure in xenvif_set_skb_gso is fatal. */ 940 kfree_skb(skb); 941 kfree_skb(nskb); 942 break; 943 } 944 } 945 946 if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) { 947 struct xen_netif_extra_info *extra; 948 enum pkt_hash_types type = PKT_HASH_TYPE_NONE; 949 950 extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; 951 952 switch (extra->u.hash.type) { 953 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4: 954 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6: 955 type = PKT_HASH_TYPE_L3; 956 break; 957 958 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP: 959 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP: 960 type = PKT_HASH_TYPE_L4; 961 break; 962 963 default: 964 break; 965 } 966 967 if (type != PKT_HASH_TYPE_NONE) 968 skb_set_hash(skb, 969 *(u32 *)extra->u.hash.value, 970 type); 971 } 972 973 XENVIF_TX_CB(skb)->pending_idx = pending_idx; 974 975 __skb_put(skb, data_len); 976 queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; 977 queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; 978 queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; 979 980 queue->tx_copy_ops[*copy_ops].dest.u.gmfn = 981 virt_to_gfn(skb->data); 982 queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; 983 queue->tx_copy_ops[*copy_ops].dest.offset = 984 offset_in_page(skb->data) & ~XEN_PAGE_MASK; 985 986 queue->tx_copy_ops[*copy_ops].len = data_len; 987 queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; 988 989 (*copy_ops)++; 990 991 if (data_len < txreq.size) { 992 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 993 pending_idx); 994 xenvif_tx_create_map_op(queue, pending_idx, &txreq, 995 extra_count, gop); 996 gop++; 997 } else { 998 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 999 INVALID_PENDING_IDX); 1000 memcpy(&queue->pending_tx_info[pending_idx].req, 1001 &txreq, sizeof(txreq)); 1002 queue->pending_tx_info[pending_idx].extra_count = 1003 extra_count; 1004 } 1005 1006 queue->pending_cons++; 1007 1008 gop = xenvif_get_requests(queue, skb, txfrags, gop, 1009 frag_overflow, nskb); 1010 1011 __skb_queue_tail(&queue->tx_queue, skb); 1012 1013 queue->tx.req_cons = idx; 1014 1015 if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || 1016 (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) 1017 break; 1018 } 1019 1020 (*map_ops) = gop - queue->tx_map_ops; 1021 return; 1022 } 1023 1024 /* Consolidate skb with a frag_list into a brand new one with local pages on 1025 * frags. Returns 0 or -ENOMEM if can't allocate new pages. 1026 */ 1027 static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb) 1028 { 1029 unsigned int offset = skb_headlen(skb); 1030 skb_frag_t frags[MAX_SKB_FRAGS]; 1031 int i, f; 1032 struct ubuf_info *uarg; 1033 struct sk_buff *nskb = skb_shinfo(skb)->frag_list; 1034 1035 queue->stats.tx_zerocopy_sent += 2; 1036 queue->stats.tx_frag_overflow++; 1037 1038 xenvif_fill_frags(queue, nskb); 1039 /* Subtract frags size, we will correct it later */ 1040 skb->truesize -= skb->data_len; 1041 skb->len += nskb->len; 1042 skb->data_len += nskb->len; 1043 1044 /* create a brand new frags array and coalesce there */ 1045 for (i = 0; offset < skb->len; i++) { 1046 struct page *page; 1047 unsigned int len; 1048 1049 BUG_ON(i >= MAX_SKB_FRAGS); 1050 page = alloc_page(GFP_ATOMIC); 1051 if (!page) { 1052 int j; 1053 skb->truesize += skb->data_len; 1054 for (j = 0; j < i; j++) 1055 put_page(frags[j].page.p); 1056 return -ENOMEM; 1057 } 1058 1059 if (offset + PAGE_SIZE < skb->len) 1060 len = PAGE_SIZE; 1061 else 1062 len = skb->len - offset; 1063 if (skb_copy_bits(skb, offset, page_address(page), len)) 1064 BUG(); 1065 1066 offset += len; 1067 frags[i].page.p = page; 1068 frags[i].page_offset = 0; 1069 skb_frag_size_set(&frags[i], len); 1070 } 1071 1072 /* Copied all the bits from the frag list -- free it. */ 1073 skb_frag_list_init(skb); 1074 xenvif_skb_zerocopy_prepare(queue, nskb); 1075 kfree_skb(nskb); 1076 1077 /* Release all the original (foreign) frags. */ 1078 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1079 skb_frag_unref(skb, f); 1080 uarg = skb_shinfo(skb)->destructor_arg; 1081 /* increase inflight counter to offset decrement in callback */ 1082 atomic_inc(&queue->inflight_packets); 1083 uarg->callback(uarg, true); 1084 skb_shinfo(skb)->destructor_arg = NULL; 1085 1086 /* Fill the skb with the new (local) frags. */ 1087 memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t)); 1088 skb_shinfo(skb)->nr_frags = i; 1089 skb->truesize += i * PAGE_SIZE; 1090 1091 return 0; 1092 } 1093 1094 static int xenvif_tx_submit(struct xenvif_queue *queue) 1095 { 1096 struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops; 1097 struct gnttab_copy *gop_copy = queue->tx_copy_ops; 1098 struct sk_buff *skb; 1099 int work_done = 0; 1100 1101 while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { 1102 struct xen_netif_tx_request *txp; 1103 u16 pending_idx; 1104 unsigned data_len; 1105 1106 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 1107 txp = &queue->pending_tx_info[pending_idx].req; 1108 1109 /* Check the remap error code. */ 1110 if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) { 1111 /* If there was an error, xenvif_tx_check_gop is 1112 * expected to release all the frags which were mapped, 1113 * so kfree_skb shouldn't do it again 1114 */ 1115 skb_shinfo(skb)->nr_frags = 0; 1116 if (skb_has_frag_list(skb)) { 1117 struct sk_buff *nskb = 1118 skb_shinfo(skb)->frag_list; 1119 skb_shinfo(nskb)->nr_frags = 0; 1120 } 1121 kfree_skb(skb); 1122 continue; 1123 } 1124 1125 data_len = skb->len; 1126 callback_param(queue, pending_idx).ctx = NULL; 1127 if (data_len < txp->size) { 1128 /* Append the packet payload as a fragment. */ 1129 txp->offset += data_len; 1130 txp->size -= data_len; 1131 } else { 1132 /* Schedule a response immediately. */ 1133 xenvif_idx_release(queue, pending_idx, 1134 XEN_NETIF_RSP_OKAY); 1135 } 1136 1137 if (txp->flags & XEN_NETTXF_csum_blank) 1138 skb->ip_summed = CHECKSUM_PARTIAL; 1139 else if (txp->flags & XEN_NETTXF_data_validated) 1140 skb->ip_summed = CHECKSUM_UNNECESSARY; 1141 1142 xenvif_fill_frags(queue, skb); 1143 1144 if (unlikely(skb_has_frag_list(skb))) { 1145 if (xenvif_handle_frag_list(queue, skb)) { 1146 if (net_ratelimit()) 1147 netdev_err(queue->vif->dev, 1148 "Not enough memory to consolidate frag_list!\n"); 1149 xenvif_skb_zerocopy_prepare(queue, skb); 1150 kfree_skb(skb); 1151 continue; 1152 } 1153 } 1154 1155 skb->dev = queue->vif->dev; 1156 skb->protocol = eth_type_trans(skb, skb->dev); 1157 skb_reset_network_header(skb); 1158 1159 if (checksum_setup(queue, skb)) { 1160 netdev_dbg(queue->vif->dev, 1161 "Can't setup checksum in net_tx_action\n"); 1162 /* We have to set this flag to trigger the callback */ 1163 if (skb_shinfo(skb)->destructor_arg) 1164 xenvif_skb_zerocopy_prepare(queue, skb); 1165 kfree_skb(skb); 1166 continue; 1167 } 1168 1169 skb_probe_transport_header(skb, 0); 1170 1171 /* If the packet is GSO then we will have just set up the 1172 * transport header offset in checksum_setup so it's now 1173 * straightforward to calculate gso_segs. 1174 */ 1175 if (skb_is_gso(skb)) { 1176 int mss = skb_shinfo(skb)->gso_size; 1177 int hdrlen = skb_transport_header(skb) - 1178 skb_mac_header(skb) + 1179 tcp_hdrlen(skb); 1180 1181 skb_shinfo(skb)->gso_segs = 1182 DIV_ROUND_UP(skb->len - hdrlen, mss); 1183 } 1184 1185 queue->stats.rx_bytes += skb->len; 1186 queue->stats.rx_packets++; 1187 1188 work_done++; 1189 1190 /* Set this flag right before netif_receive_skb, otherwise 1191 * someone might think this packet already left netback, and 1192 * do a skb_copy_ubufs while we are still in control of the 1193 * skb. E.g. the __pskb_pull_tail earlier can do such thing. 1194 */ 1195 if (skb_shinfo(skb)->destructor_arg) { 1196 xenvif_skb_zerocopy_prepare(queue, skb); 1197 queue->stats.tx_zerocopy_sent++; 1198 } 1199 1200 netif_receive_skb(skb); 1201 } 1202 1203 return work_done; 1204 } 1205 1206 void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) 1207 { 1208 unsigned long flags; 1209 pending_ring_idx_t index; 1210 struct xenvif_queue *queue = ubuf_to_queue(ubuf); 1211 1212 /* This is the only place where we grab this lock, to protect callbacks 1213 * from each other. 1214 */ 1215 spin_lock_irqsave(&queue->callback_lock, flags); 1216 do { 1217 u16 pending_idx = ubuf->desc; 1218 ubuf = (struct ubuf_info *) ubuf->ctx; 1219 BUG_ON(queue->dealloc_prod - queue->dealloc_cons >= 1220 MAX_PENDING_REQS); 1221 index = pending_index(queue->dealloc_prod); 1222 queue->dealloc_ring[index] = pending_idx; 1223 /* Sync with xenvif_tx_dealloc_action: 1224 * insert idx then incr producer. 1225 */ 1226 smp_wmb(); 1227 queue->dealloc_prod++; 1228 } while (ubuf); 1229 spin_unlock_irqrestore(&queue->callback_lock, flags); 1230 1231 if (likely(zerocopy_success)) 1232 queue->stats.tx_zerocopy_success++; 1233 else 1234 queue->stats.tx_zerocopy_fail++; 1235 xenvif_skb_zerocopy_complete(queue); 1236 } 1237 1238 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) 1239 { 1240 struct gnttab_unmap_grant_ref *gop; 1241 pending_ring_idx_t dc, dp; 1242 u16 pending_idx, pending_idx_release[MAX_PENDING_REQS]; 1243 unsigned int i = 0; 1244 1245 dc = queue->dealloc_cons; 1246 gop = queue->tx_unmap_ops; 1247 1248 /* Free up any grants we have finished using */ 1249 do { 1250 dp = queue->dealloc_prod; 1251 1252 /* Ensure we see all indices enqueued by all 1253 * xenvif_zerocopy_callback(). 1254 */ 1255 smp_rmb(); 1256 1257 while (dc != dp) { 1258 BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); 1259 pending_idx = 1260 queue->dealloc_ring[pending_index(dc++)]; 1261 1262 pending_idx_release[gop - queue->tx_unmap_ops] = 1263 pending_idx; 1264 queue->pages_to_unmap[gop - queue->tx_unmap_ops] = 1265 queue->mmap_pages[pending_idx]; 1266 gnttab_set_unmap_op(gop, 1267 idx_to_kaddr(queue, pending_idx), 1268 GNTMAP_host_map, 1269 queue->grant_tx_handle[pending_idx]); 1270 xenvif_grant_handle_reset(queue, pending_idx); 1271 ++gop; 1272 } 1273 1274 } while (dp != queue->dealloc_prod); 1275 1276 queue->dealloc_cons = dc; 1277 1278 if (gop - queue->tx_unmap_ops > 0) { 1279 int ret; 1280 ret = gnttab_unmap_refs(queue->tx_unmap_ops, 1281 NULL, 1282 queue->pages_to_unmap, 1283 gop - queue->tx_unmap_ops); 1284 if (ret) { 1285 netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n", 1286 gop - queue->tx_unmap_ops, ret); 1287 for (i = 0; i < gop - queue->tx_unmap_ops; ++i) { 1288 if (gop[i].status != GNTST_okay) 1289 netdev_err(queue->vif->dev, 1290 " host_addr: 0x%llx handle: 0x%x status: %d\n", 1291 gop[i].host_addr, 1292 gop[i].handle, 1293 gop[i].status); 1294 } 1295 BUG(); 1296 } 1297 } 1298 1299 for (i = 0; i < gop - queue->tx_unmap_ops; ++i) 1300 xenvif_idx_release(queue, pending_idx_release[i], 1301 XEN_NETIF_RSP_OKAY); 1302 } 1303 1304 1305 /* Called after netfront has transmitted */ 1306 int xenvif_tx_action(struct xenvif_queue *queue, int budget) 1307 { 1308 unsigned nr_mops, nr_cops = 0; 1309 int work_done, ret; 1310 1311 if (unlikely(!tx_work_todo(queue))) 1312 return 0; 1313 1314 xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops); 1315 1316 if (nr_cops == 0) 1317 return 0; 1318 1319 gnttab_batch_copy(queue->tx_copy_ops, nr_cops); 1320 if (nr_mops != 0) { 1321 ret = gnttab_map_refs(queue->tx_map_ops, 1322 NULL, 1323 queue->pages_to_map, 1324 nr_mops); 1325 BUG_ON(ret); 1326 } 1327 1328 work_done = xenvif_tx_submit(queue); 1329 1330 return work_done; 1331 } 1332 1333 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, 1334 u8 status) 1335 { 1336 struct pending_tx_info *pending_tx_info; 1337 pending_ring_idx_t index; 1338 unsigned long flags; 1339 1340 pending_tx_info = &queue->pending_tx_info[pending_idx]; 1341 1342 spin_lock_irqsave(&queue->response_lock, flags); 1343 1344 make_tx_response(queue, &pending_tx_info->req, 1345 pending_tx_info->extra_count, status); 1346 1347 /* Release the pending index before pusing the Tx response so 1348 * its available before a new Tx request is pushed by the 1349 * frontend. 1350 */ 1351 index = pending_index(queue->pending_prod++); 1352 queue->pending_ring[index] = pending_idx; 1353 1354 push_tx_responses(queue); 1355 1356 spin_unlock_irqrestore(&queue->response_lock, flags); 1357 } 1358 1359 1360 static void make_tx_response(struct xenvif_queue *queue, 1361 struct xen_netif_tx_request *txp, 1362 unsigned int extra_count, 1363 s8 st) 1364 { 1365 RING_IDX i = queue->tx.rsp_prod_pvt; 1366 struct xen_netif_tx_response *resp; 1367 1368 resp = RING_GET_RESPONSE(&queue->tx, i); 1369 resp->id = txp->id; 1370 resp->status = st; 1371 1372 while (extra_count-- != 0) 1373 RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1374 1375 queue->tx.rsp_prod_pvt = ++i; 1376 } 1377 1378 static void push_tx_responses(struct xenvif_queue *queue) 1379 { 1380 int notify; 1381 1382 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); 1383 if (notify) 1384 notify_remote_via_irq(queue->tx_irq); 1385 } 1386 1387 void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) 1388 { 1389 int ret; 1390 struct gnttab_unmap_grant_ref tx_unmap_op; 1391 1392 gnttab_set_unmap_op(&tx_unmap_op, 1393 idx_to_kaddr(queue, pending_idx), 1394 GNTMAP_host_map, 1395 queue->grant_tx_handle[pending_idx]); 1396 xenvif_grant_handle_reset(queue, pending_idx); 1397 1398 ret = gnttab_unmap_refs(&tx_unmap_op, NULL, 1399 &queue->mmap_pages[pending_idx], 1); 1400 if (ret) { 1401 netdev_err(queue->vif->dev, 1402 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n", 1403 ret, 1404 pending_idx, 1405 tx_unmap_op.host_addr, 1406 tx_unmap_op.handle, 1407 tx_unmap_op.status); 1408 BUG(); 1409 } 1410 } 1411 1412 static inline int tx_work_todo(struct xenvif_queue *queue) 1413 { 1414 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))) 1415 return 1; 1416 1417 return 0; 1418 } 1419 1420 static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue) 1421 { 1422 return queue->dealloc_cons != queue->dealloc_prod; 1423 } 1424 1425 void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue) 1426 { 1427 if (queue->tx.sring) 1428 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif), 1429 queue->tx.sring); 1430 if (queue->rx.sring) 1431 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif), 1432 queue->rx.sring); 1433 } 1434 1435 int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, 1436 grant_ref_t tx_ring_ref, 1437 grant_ref_t rx_ring_ref) 1438 { 1439 void *addr; 1440 struct xen_netif_tx_sring *txs; 1441 struct xen_netif_rx_sring *rxs; 1442 1443 int err = -ENOMEM; 1444 1445 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), 1446 &tx_ring_ref, 1, &addr); 1447 if (err) 1448 goto err; 1449 1450 txs = (struct xen_netif_tx_sring *)addr; 1451 BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE); 1452 1453 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), 1454 &rx_ring_ref, 1, &addr); 1455 if (err) 1456 goto err; 1457 1458 rxs = (struct xen_netif_rx_sring *)addr; 1459 BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); 1460 1461 return 0; 1462 1463 err: 1464 xenvif_unmap_frontend_data_rings(queue); 1465 return err; 1466 } 1467 1468 static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue) 1469 { 1470 /* Dealloc thread must remain running until all inflight 1471 * packets complete. 1472 */ 1473 return kthread_should_stop() && 1474 !atomic_read(&queue->inflight_packets); 1475 } 1476 1477 int xenvif_dealloc_kthread(void *data) 1478 { 1479 struct xenvif_queue *queue = data; 1480 1481 for (;;) { 1482 wait_event_interruptible(queue->dealloc_wq, 1483 tx_dealloc_work_todo(queue) || 1484 xenvif_dealloc_kthread_should_stop(queue)); 1485 if (xenvif_dealloc_kthread_should_stop(queue)) 1486 break; 1487 1488 xenvif_tx_dealloc_action(queue); 1489 cond_resched(); 1490 } 1491 1492 /* Unmap anything remaining*/ 1493 if (tx_dealloc_work_todo(queue)) 1494 xenvif_tx_dealloc_action(queue); 1495 1496 return 0; 1497 } 1498 1499 static void make_ctrl_response(struct xenvif *vif, 1500 const struct xen_netif_ctrl_request *req, 1501 u32 status, u32 data) 1502 { 1503 RING_IDX idx = vif->ctrl.rsp_prod_pvt; 1504 struct xen_netif_ctrl_response rsp = { 1505 .id = req->id, 1506 .type = req->type, 1507 .status = status, 1508 .data = data, 1509 }; 1510 1511 *RING_GET_RESPONSE(&vif->ctrl, idx) = rsp; 1512 vif->ctrl.rsp_prod_pvt = ++idx; 1513 } 1514 1515 static void push_ctrl_response(struct xenvif *vif) 1516 { 1517 int notify; 1518 1519 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify); 1520 if (notify) 1521 notify_remote_via_irq(vif->ctrl_irq); 1522 } 1523 1524 static void process_ctrl_request(struct xenvif *vif, 1525 const struct xen_netif_ctrl_request *req) 1526 { 1527 u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED; 1528 u32 data = 0; 1529 1530 switch (req->type) { 1531 case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM: 1532 status = xenvif_set_hash_alg(vif, req->data[0]); 1533 break; 1534 1535 case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS: 1536 status = xenvif_get_hash_flags(vif, &data); 1537 break; 1538 1539 case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS: 1540 status = xenvif_set_hash_flags(vif, req->data[0]); 1541 break; 1542 1543 case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY: 1544 status = xenvif_set_hash_key(vif, req->data[0], 1545 req->data[1]); 1546 break; 1547 1548 case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE: 1549 status = XEN_NETIF_CTRL_STATUS_SUCCESS; 1550 data = XEN_NETBK_MAX_HASH_MAPPING_SIZE; 1551 break; 1552 1553 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE: 1554 status = xenvif_set_hash_mapping_size(vif, 1555 req->data[0]); 1556 break; 1557 1558 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING: 1559 status = xenvif_set_hash_mapping(vif, req->data[0], 1560 req->data[1], 1561 req->data[2]); 1562 break; 1563 1564 default: 1565 break; 1566 } 1567 1568 make_ctrl_response(vif, req, status, data); 1569 push_ctrl_response(vif); 1570 } 1571 1572 static void xenvif_ctrl_action(struct xenvif *vif) 1573 { 1574 for (;;) { 1575 RING_IDX req_prod, req_cons; 1576 1577 req_prod = vif->ctrl.sring->req_prod; 1578 req_cons = vif->ctrl.req_cons; 1579 1580 /* Make sure we can see requests before we process them. */ 1581 rmb(); 1582 1583 if (req_cons == req_prod) 1584 break; 1585 1586 while (req_cons != req_prod) { 1587 struct xen_netif_ctrl_request req; 1588 1589 RING_COPY_REQUEST(&vif->ctrl, req_cons, &req); 1590 req_cons++; 1591 1592 process_ctrl_request(vif, &req); 1593 } 1594 1595 vif->ctrl.req_cons = req_cons; 1596 vif->ctrl.sring->req_event = req_cons + 1; 1597 } 1598 } 1599 1600 static bool xenvif_ctrl_work_todo(struct xenvif *vif) 1601 { 1602 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl))) 1603 return 1; 1604 1605 return 0; 1606 } 1607 1608 irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) 1609 { 1610 struct xenvif *vif = data; 1611 1612 while (xenvif_ctrl_work_todo(vif)) 1613 xenvif_ctrl_action(vif); 1614 1615 return IRQ_HANDLED; 1616 } 1617 1618 static int __init netback_init(void) 1619 { 1620 int rc = 0; 1621 1622 if (!xen_domain()) 1623 return -ENODEV; 1624 1625 /* Allow as many queues as there are CPUs if user has not 1626 * specified a value. 1627 */ 1628 if (xenvif_max_queues == 0) 1629 xenvif_max_queues = num_online_cpus(); 1630 1631 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1632 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1633 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1634 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1635 } 1636 1637 rc = xenvif_xenbus_init(); 1638 if (rc) 1639 goto failed_init; 1640 1641 #ifdef CONFIG_DEBUG_FS 1642 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); 1643 if (IS_ERR_OR_NULL(xen_netback_dbg_root)) 1644 pr_warn("Init of debugfs returned %ld!\n", 1645 PTR_ERR(xen_netback_dbg_root)); 1646 #endif /* CONFIG_DEBUG_FS */ 1647 1648 return 0; 1649 1650 failed_init: 1651 return rc; 1652 } 1653 1654 module_init(netback_init); 1655 1656 static void __exit netback_fini(void) 1657 { 1658 #ifdef CONFIG_DEBUG_FS 1659 if (!IS_ERR_OR_NULL(xen_netback_dbg_root)) 1660 debugfs_remove_recursive(xen_netback_dbg_root); 1661 #endif /* CONFIG_DEBUG_FS */ 1662 xenvif_xenbus_fini(); 1663 } 1664 module_exit(netback_fini); 1665 1666 MODULE_LICENSE("Dual BSD/GPL"); 1667 MODULE_ALIAS("xen-backend:vif"); 1668