1 /* 2 * Network-device interface management. 3 * 4 * Copyright (c) 2004-2005, Keir Fraser 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License version 2 8 * as published by the Free Software Foundation; or, when distributed 9 * separately from the Linux kernel or incorporated into other 10 * software packages, subject to the following license: 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this source file (the "Software"), to deal in the Software without 14 * restriction, including without limitation the rights to use, copy, modify, 15 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 16 * and to permit persons to whom the Software is furnished to do so, subject to 17 * the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 28 * IN THE SOFTWARE. 29 */ 30 31 #include "common.h" 32 33 #include <linux/kthread.h> 34 #include <linux/ethtool.h> 35 #include <linux/rtnetlink.h> 36 #include <linux/if_vlan.h> 37 #include <linux/vmalloc.h> 38 39 #include <xen/events.h> 40 #include <asm/xen/hypercall.h> 41 #include <xen/balloon.h> 42 43 #define XENVIF_QUEUE_LENGTH 32 44 #define XENVIF_NAPI_WEIGHT 64 45 46 int xenvif_schedulable(struct xenvif *vif) 47 { 48 return netif_running(vif->dev) && netif_carrier_ok(vif->dev); 49 } 50 51 static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) 52 { 53 struct xenvif *vif = dev_id; 54 55 if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) 56 napi_schedule(&vif->napi); 57 58 return IRQ_HANDLED; 59 } 60 61 static int xenvif_poll(struct napi_struct *napi, int budget) 62 { 63 struct xenvif *vif = container_of(napi, struct xenvif, napi); 64 int work_done; 65 66 /* This vif is rogue, we pretend we've there is nothing to do 67 * for this vif to deschedule it from NAPI. But this interface 68 * will be turned off in thread context later. 69 */ 70 if (unlikely(vif->disabled)) { 71 napi_complete(napi); 72 return 0; 73 } 74 75 work_done = xenvif_tx_action(vif, budget); 76 77 if (work_done < budget) { 78 int more_to_do = 0; 79 unsigned long flags; 80 81 /* It is necessary to disable IRQ before calling 82 * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might 83 * lose event from the frontend. 84 * 85 * Consider: 86 * RING_HAS_UNCONSUMED_REQUESTS 87 * <frontend generates event to trigger napi_schedule> 88 * __napi_complete 89 * 90 * This handler is still in scheduled state so the 91 * event has no effect at all. After __napi_complete 92 * this handler is descheduled and cannot get 93 * scheduled again. We lose event in this case and the ring 94 * will be completely stalled. 95 */ 96 97 local_irq_save(flags); 98 99 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 100 if (!more_to_do) 101 __napi_complete(napi); 102 103 local_irq_restore(flags); 104 } 105 106 return work_done; 107 } 108 109 static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) 110 { 111 struct xenvif *vif = dev_id; 112 113 xenvif_kick_thread(vif); 114 115 return IRQ_HANDLED; 116 } 117 118 static irqreturn_t xenvif_interrupt(int irq, void *dev_id) 119 { 120 xenvif_tx_interrupt(irq, dev_id); 121 xenvif_rx_interrupt(irq, dev_id); 122 123 return IRQ_HANDLED; 124 } 125 126 static void xenvif_wake_queue(unsigned long data) 127 { 128 struct xenvif *vif = (struct xenvif *)data; 129 130 if (netif_queue_stopped(vif->dev)) { 131 netdev_err(vif->dev, "draining TX queue\n"); 132 vif->rx_queue_purge = true; 133 xenvif_kick_thread(vif); 134 netif_wake_queue(vif->dev); 135 } 136 } 137 138 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 139 { 140 struct xenvif *vif = netdev_priv(dev); 141 int min_slots_needed; 142 143 BUG_ON(skb->dev != dev); 144 145 /* Drop the packet if vif is not ready */ 146 if (vif->task == NULL || 147 vif->dealloc_task == NULL || 148 !xenvif_schedulable(vif)) 149 goto drop; 150 151 /* At best we'll need one slot for the header and one for each 152 * frag. 153 */ 154 min_slots_needed = 1 + skb_shinfo(skb)->nr_frags; 155 156 /* If the skb is GSO then we'll also need an extra slot for the 157 * metadata. 158 */ 159 if (skb_is_gso(skb)) 160 min_slots_needed++; 161 162 /* If the skb can't possibly fit in the remaining slots 163 * then turn off the queue to give the ring a chance to 164 * drain. 165 */ 166 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { 167 vif->wake_queue.function = xenvif_wake_queue; 168 vif->wake_queue.data = (unsigned long)vif; 169 xenvif_stop_queue(vif); 170 mod_timer(&vif->wake_queue, 171 jiffies + rx_drain_timeout_jiffies); 172 } 173 174 skb_queue_tail(&vif->rx_queue, skb); 175 xenvif_kick_thread(vif); 176 177 return NETDEV_TX_OK; 178 179 drop: 180 vif->dev->stats.tx_dropped++; 181 dev_kfree_skb(skb); 182 return NETDEV_TX_OK; 183 } 184 185 static struct net_device_stats *xenvif_get_stats(struct net_device *dev) 186 { 187 struct xenvif *vif = netdev_priv(dev); 188 return &vif->dev->stats; 189 } 190 191 static void xenvif_up(struct xenvif *vif) 192 { 193 napi_enable(&vif->napi); 194 enable_irq(vif->tx_irq); 195 if (vif->tx_irq != vif->rx_irq) 196 enable_irq(vif->rx_irq); 197 xenvif_check_rx_xenvif(vif); 198 } 199 200 static void xenvif_down(struct xenvif *vif) 201 { 202 napi_disable(&vif->napi); 203 disable_irq(vif->tx_irq); 204 if (vif->tx_irq != vif->rx_irq) 205 disable_irq(vif->rx_irq); 206 del_timer_sync(&vif->credit_timeout); 207 } 208 209 static int xenvif_open(struct net_device *dev) 210 { 211 struct xenvif *vif = netdev_priv(dev); 212 if (netif_carrier_ok(dev)) 213 xenvif_up(vif); 214 netif_start_queue(dev); 215 return 0; 216 } 217 218 static int xenvif_close(struct net_device *dev) 219 { 220 struct xenvif *vif = netdev_priv(dev); 221 if (netif_carrier_ok(dev)) 222 xenvif_down(vif); 223 netif_stop_queue(dev); 224 return 0; 225 } 226 227 static int xenvif_change_mtu(struct net_device *dev, int mtu) 228 { 229 struct xenvif *vif = netdev_priv(dev); 230 int max = vif->can_sg ? 65535 - VLAN_ETH_HLEN : ETH_DATA_LEN; 231 232 if (mtu > max) 233 return -EINVAL; 234 dev->mtu = mtu; 235 return 0; 236 } 237 238 static netdev_features_t xenvif_fix_features(struct net_device *dev, 239 netdev_features_t features) 240 { 241 struct xenvif *vif = netdev_priv(dev); 242 243 if (!vif->can_sg) 244 features &= ~NETIF_F_SG; 245 if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4)) 246 features &= ~NETIF_F_TSO; 247 if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6)) 248 features &= ~NETIF_F_TSO6; 249 if (!vif->ip_csum) 250 features &= ~NETIF_F_IP_CSUM; 251 if (!vif->ipv6_csum) 252 features &= ~NETIF_F_IPV6_CSUM; 253 254 return features; 255 } 256 257 static const struct xenvif_stat { 258 char name[ETH_GSTRING_LEN]; 259 u16 offset; 260 } xenvif_stats[] = { 261 { 262 "rx_gso_checksum_fixup", 263 offsetof(struct xenvif, rx_gso_checksum_fixup) 264 }, 265 /* If (sent != success + fail), there are probably packets never 266 * freed up properly! 267 */ 268 { 269 "tx_zerocopy_sent", 270 offsetof(struct xenvif, tx_zerocopy_sent), 271 }, 272 { 273 "tx_zerocopy_success", 274 offsetof(struct xenvif, tx_zerocopy_success), 275 }, 276 { 277 "tx_zerocopy_fail", 278 offsetof(struct xenvif, tx_zerocopy_fail) 279 }, 280 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use 281 * a guest with the same MAX_SKB_FRAG 282 */ 283 { 284 "tx_frag_overflow", 285 offsetof(struct xenvif, tx_frag_overflow) 286 }, 287 }; 288 289 static int xenvif_get_sset_count(struct net_device *dev, int string_set) 290 { 291 switch (string_set) { 292 case ETH_SS_STATS: 293 return ARRAY_SIZE(xenvif_stats); 294 default: 295 return -EINVAL; 296 } 297 } 298 299 static void xenvif_get_ethtool_stats(struct net_device *dev, 300 struct ethtool_stats *stats, u64 * data) 301 { 302 void *vif = netdev_priv(dev); 303 int i; 304 305 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) 306 data[i] = *(unsigned long *)(vif + xenvif_stats[i].offset); 307 } 308 309 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) 310 { 311 int i; 312 313 switch (stringset) { 314 case ETH_SS_STATS: 315 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) 316 memcpy(data + i * ETH_GSTRING_LEN, 317 xenvif_stats[i].name, ETH_GSTRING_LEN); 318 break; 319 } 320 } 321 322 static const struct ethtool_ops xenvif_ethtool_ops = { 323 .get_link = ethtool_op_get_link, 324 325 .get_sset_count = xenvif_get_sset_count, 326 .get_ethtool_stats = xenvif_get_ethtool_stats, 327 .get_strings = xenvif_get_strings, 328 }; 329 330 static const struct net_device_ops xenvif_netdev_ops = { 331 .ndo_start_xmit = xenvif_start_xmit, 332 .ndo_get_stats = xenvif_get_stats, 333 .ndo_open = xenvif_open, 334 .ndo_stop = xenvif_close, 335 .ndo_change_mtu = xenvif_change_mtu, 336 .ndo_fix_features = xenvif_fix_features, 337 .ndo_set_mac_address = eth_mac_addr, 338 .ndo_validate_addr = eth_validate_addr, 339 }; 340 341 struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, 342 unsigned int handle) 343 { 344 int err; 345 struct net_device *dev; 346 struct xenvif *vif; 347 char name[IFNAMSIZ] = {}; 348 int i; 349 350 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); 351 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup); 352 if (dev == NULL) { 353 pr_warn("Could not allocate netdev for %s\n", name); 354 return ERR_PTR(-ENOMEM); 355 } 356 357 SET_NETDEV_DEV(dev, parent); 358 359 vif = netdev_priv(dev); 360 361 vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) * 362 MAX_GRANT_COPY_OPS); 363 if (vif->grant_copy_op == NULL) { 364 pr_warn("Could not allocate grant copy space for %s\n", name); 365 free_netdev(dev); 366 return ERR_PTR(-ENOMEM); 367 } 368 369 vif->domid = domid; 370 vif->handle = handle; 371 vif->can_sg = 1; 372 vif->ip_csum = 1; 373 vif->dev = dev; 374 375 vif->disabled = false; 376 377 vif->credit_bytes = vif->remaining_credit = ~0UL; 378 vif->credit_usec = 0UL; 379 init_timer(&vif->credit_timeout); 380 vif->credit_window_start = get_jiffies_64(); 381 382 init_timer(&vif->wake_queue); 383 384 dev->netdev_ops = &xenvif_netdev_ops; 385 dev->hw_features = NETIF_F_SG | 386 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 387 NETIF_F_TSO | NETIF_F_TSO6; 388 dev->features = dev->hw_features | NETIF_F_RXCSUM; 389 SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); 390 391 dev->tx_queue_len = XENVIF_QUEUE_LENGTH; 392 393 skb_queue_head_init(&vif->rx_queue); 394 skb_queue_head_init(&vif->tx_queue); 395 396 vif->pending_cons = 0; 397 vif->pending_prod = MAX_PENDING_REQS; 398 for (i = 0; i < MAX_PENDING_REQS; i++) 399 vif->pending_ring[i] = i; 400 spin_lock_init(&vif->callback_lock); 401 spin_lock_init(&vif->response_lock); 402 /* If ballooning is disabled, this will consume real memory, so you 403 * better enable it. The long term solution would be to use just a 404 * bunch of valid page descriptors, without dependency on ballooning 405 */ 406 err = alloc_xenballooned_pages(MAX_PENDING_REQS, 407 vif->mmap_pages, 408 false); 409 if (err) { 410 netdev_err(dev, "Could not reserve mmap_pages\n"); 411 return ERR_PTR(-ENOMEM); 412 } 413 for (i = 0; i < MAX_PENDING_REQS; i++) { 414 vif->pending_tx_info[i].callback_struct = (struct ubuf_info) 415 { .callback = xenvif_zerocopy_callback, 416 .ctx = NULL, 417 .desc = i }; 418 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; 419 } 420 421 /* 422 * Initialise a dummy MAC address. We choose the numerically 423 * largest non-broadcast address to prevent the address getting 424 * stolen by an Ethernet bridge for STP purposes. 425 * (FE:FF:FF:FF:FF:FF) 426 */ 427 memset(dev->dev_addr, 0xFF, ETH_ALEN); 428 dev->dev_addr[0] &= ~0x01; 429 430 netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT); 431 432 netif_carrier_off(dev); 433 434 err = register_netdev(dev); 435 if (err) { 436 netdev_warn(dev, "Could not register device: err=%d\n", err); 437 free_netdev(dev); 438 return ERR_PTR(err); 439 } 440 441 netdev_dbg(dev, "Successfully created xenvif\n"); 442 443 __module_get(THIS_MODULE); 444 445 return vif; 446 } 447 448 int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, 449 unsigned long rx_ring_ref, unsigned int tx_evtchn, 450 unsigned int rx_evtchn) 451 { 452 struct task_struct *task; 453 int err = -ENOMEM; 454 455 BUG_ON(vif->tx_irq); 456 BUG_ON(vif->task); 457 BUG_ON(vif->dealloc_task); 458 459 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 460 if (err < 0) 461 goto err; 462 463 init_waitqueue_head(&vif->wq); 464 init_waitqueue_head(&vif->dealloc_wq); 465 466 if (tx_evtchn == rx_evtchn) { 467 /* feature-split-event-channels == 0 */ 468 err = bind_interdomain_evtchn_to_irqhandler( 469 vif->domid, tx_evtchn, xenvif_interrupt, 0, 470 vif->dev->name, vif); 471 if (err < 0) 472 goto err_unmap; 473 vif->tx_irq = vif->rx_irq = err; 474 disable_irq(vif->tx_irq); 475 } else { 476 /* feature-split-event-channels == 1 */ 477 snprintf(vif->tx_irq_name, sizeof(vif->tx_irq_name), 478 "%s-tx", vif->dev->name); 479 err = bind_interdomain_evtchn_to_irqhandler( 480 vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, 481 vif->tx_irq_name, vif); 482 if (err < 0) 483 goto err_unmap; 484 vif->tx_irq = err; 485 disable_irq(vif->tx_irq); 486 487 snprintf(vif->rx_irq_name, sizeof(vif->rx_irq_name), 488 "%s-rx", vif->dev->name); 489 err = bind_interdomain_evtchn_to_irqhandler( 490 vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, 491 vif->rx_irq_name, vif); 492 if (err < 0) 493 goto err_tx_unbind; 494 vif->rx_irq = err; 495 disable_irq(vif->rx_irq); 496 } 497 498 task = kthread_create(xenvif_kthread_guest_rx, 499 (void *)vif, "%s-guest-rx", vif->dev->name); 500 if (IS_ERR(task)) { 501 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 502 err = PTR_ERR(task); 503 goto err_rx_unbind; 504 } 505 506 vif->task = task; 507 508 task = kthread_create(xenvif_dealloc_kthread, 509 (void *)vif, "%s-dealloc", vif->dev->name); 510 if (IS_ERR(task)) { 511 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 512 err = PTR_ERR(task); 513 goto err_rx_unbind; 514 } 515 516 vif->dealloc_task = task; 517 518 rtnl_lock(); 519 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) 520 dev_set_mtu(vif->dev, ETH_DATA_LEN); 521 netdev_update_features(vif->dev); 522 netif_carrier_on(vif->dev); 523 if (netif_running(vif->dev)) 524 xenvif_up(vif); 525 rtnl_unlock(); 526 527 wake_up_process(vif->task); 528 wake_up_process(vif->dealloc_task); 529 530 return 0; 531 532 err_rx_unbind: 533 unbind_from_irqhandler(vif->rx_irq, vif); 534 vif->rx_irq = 0; 535 err_tx_unbind: 536 unbind_from_irqhandler(vif->tx_irq, vif); 537 vif->tx_irq = 0; 538 err_unmap: 539 xenvif_unmap_frontend_rings(vif); 540 err: 541 module_put(THIS_MODULE); 542 return err; 543 } 544 545 void xenvif_carrier_off(struct xenvif *vif) 546 { 547 struct net_device *dev = vif->dev; 548 549 rtnl_lock(); 550 netif_carrier_off(dev); /* discard queued packets */ 551 if (netif_running(dev)) 552 xenvif_down(vif); 553 rtnl_unlock(); 554 } 555 556 void xenvif_disconnect(struct xenvif *vif) 557 { 558 if (netif_carrier_ok(vif->dev)) 559 xenvif_carrier_off(vif); 560 561 if (vif->task) { 562 del_timer_sync(&vif->wake_queue); 563 kthread_stop(vif->task); 564 vif->task = NULL; 565 } 566 567 if (vif->dealloc_task) { 568 kthread_stop(vif->dealloc_task); 569 vif->dealloc_task = NULL; 570 } 571 572 if (vif->tx_irq) { 573 if (vif->tx_irq == vif->rx_irq) 574 unbind_from_irqhandler(vif->tx_irq, vif); 575 else { 576 unbind_from_irqhandler(vif->tx_irq, vif); 577 unbind_from_irqhandler(vif->rx_irq, vif); 578 } 579 vif->tx_irq = 0; 580 } 581 582 xenvif_unmap_frontend_rings(vif); 583 } 584 585 void xenvif_free(struct xenvif *vif) 586 { 587 int i, unmap_timeout = 0; 588 /* Here we want to avoid timeout messages if an skb can be legitimately 589 * stuck somewhere else. Realistically this could be an another vif's 590 * internal or QDisc queue. That another vif also has this 591 * rx_drain_timeout_msecs timeout, but the timer only ditches the 592 * internal queue. After that, the QDisc queue can put in worst case 593 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's 594 * internal queue, so we need several rounds of such timeouts until we 595 * can be sure that no another vif should have skb's from us. We are 596 * not sending more skb's, so newly stuck packets are not interesting 597 * for us here. 598 */ 599 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * 600 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); 601 602 for (i = 0; i < MAX_PENDING_REQS; ++i) { 603 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { 604 unmap_timeout++; 605 schedule_timeout(msecs_to_jiffies(1000)); 606 if (unmap_timeout > worst_case_skb_lifetime && 607 net_ratelimit()) 608 netdev_err(vif->dev, 609 "Page still granted! Index: %x\n", 610 i); 611 /* If there are still unmapped pages, reset the loop to 612 * start checking again. We shouldn't exit here until 613 * dealloc thread and NAPI instance release all the 614 * pages. If a kernel bug causes the skbs to stall 615 * somewhere, the interface cannot be brought down 616 * properly. 617 */ 618 i = -1; 619 } 620 } 621 622 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); 623 624 netif_napi_del(&vif->napi); 625 626 unregister_netdev(vif->dev); 627 628 vfree(vif->grant_copy_op); 629 free_netdev(vif->dev); 630 631 module_put(THIS_MODULE); 632 } 633