1 /* 2 * drivers/net/veth.c 3 * 4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5 * 6 * Author: Pavel Emelianov <xemul@openvz.org> 7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8 * 9 */ 10 11 #include <linux/netdevice.h> 12 #include <linux/slab.h> 13 #include <linux/ethtool.h> 14 #include <linux/etherdevice.h> 15 #include <linux/u64_stats_sync.h> 16 17 #include <net/rtnetlink.h> 18 #include <net/dst.h> 19 #include <net/xfrm.h> 20 #include <linux/veth.h> 21 #include <linux/module.h> 22 23 #define DRV_NAME "veth" 24 #define DRV_VERSION "1.0" 25 26 struct pcpu_vstats { 27 u64 packets; 28 u64 bytes; 29 struct u64_stats_sync syncp; 30 }; 31 32 struct veth_priv { 33 struct net_device __rcu *peer; 34 atomic64_t dropped; 35 unsigned requested_headroom; 36 }; 37 38 /* 39 * ethtool interface 40 */ 41 42 static struct { 43 const char string[ETH_GSTRING_LEN]; 44 } ethtool_stats_keys[] = { 45 { "peer_ifindex" }, 46 }; 47 48 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 49 { 50 cmd->supported = 0; 51 cmd->advertising = 0; 52 ethtool_cmd_speed_set(cmd, SPEED_10000); 53 cmd->duplex = DUPLEX_FULL; 54 cmd->port = PORT_TP; 55 cmd->phy_address = 0; 56 cmd->transceiver = XCVR_INTERNAL; 57 cmd->autoneg = AUTONEG_DISABLE; 58 cmd->maxtxpkt = 0; 59 cmd->maxrxpkt = 0; 60 return 0; 61 } 62 63 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 64 { 65 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 66 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 67 } 68 69 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 70 { 71 switch(stringset) { 72 case ETH_SS_STATS: 73 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 74 break; 75 } 76 } 77 78 static int veth_get_sset_count(struct net_device *dev, int sset) 79 { 80 switch (sset) { 81 case ETH_SS_STATS: 82 return ARRAY_SIZE(ethtool_stats_keys); 83 default: 84 return -EOPNOTSUPP; 85 } 86 } 87 88 static void veth_get_ethtool_stats(struct net_device *dev, 89 struct ethtool_stats *stats, u64 *data) 90 { 91 struct veth_priv *priv = netdev_priv(dev); 92 struct net_device *peer = rtnl_dereference(priv->peer); 93 94 data[0] = peer ? peer->ifindex : 0; 95 } 96 97 static const struct ethtool_ops veth_ethtool_ops = { 98 .get_settings = veth_get_settings, 99 .get_drvinfo = veth_get_drvinfo, 100 .get_link = ethtool_op_get_link, 101 .get_strings = veth_get_strings, 102 .get_sset_count = veth_get_sset_count, 103 .get_ethtool_stats = veth_get_ethtool_stats, 104 }; 105 106 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 107 { 108 struct veth_priv *priv = netdev_priv(dev); 109 struct net_device *rcv; 110 int length = skb->len; 111 112 rcu_read_lock(); 113 rcv = rcu_dereference(priv->peer); 114 if (unlikely(!rcv)) { 115 kfree_skb(skb); 116 goto drop; 117 } 118 119 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 120 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 121 122 u64_stats_update_begin(&stats->syncp); 123 stats->bytes += length; 124 stats->packets++; 125 u64_stats_update_end(&stats->syncp); 126 } else { 127 drop: 128 atomic64_inc(&priv->dropped); 129 } 130 rcu_read_unlock(); 131 return NETDEV_TX_OK; 132 } 133 134 /* 135 * general routines 136 */ 137 138 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 139 { 140 struct veth_priv *priv = netdev_priv(dev); 141 int cpu; 142 143 result->packets = 0; 144 result->bytes = 0; 145 for_each_possible_cpu(cpu) { 146 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 147 u64 packets, bytes; 148 unsigned int start; 149 150 do { 151 start = u64_stats_fetch_begin_irq(&stats->syncp); 152 packets = stats->packets; 153 bytes = stats->bytes; 154 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 155 result->packets += packets; 156 result->bytes += bytes; 157 } 158 return atomic64_read(&priv->dropped); 159 } 160 161 static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev, 162 struct rtnl_link_stats64 *tot) 163 { 164 struct veth_priv *priv = netdev_priv(dev); 165 struct net_device *peer; 166 struct pcpu_vstats one; 167 168 tot->tx_dropped = veth_stats_one(&one, dev); 169 tot->tx_bytes = one.bytes; 170 tot->tx_packets = one.packets; 171 172 rcu_read_lock(); 173 peer = rcu_dereference(priv->peer); 174 if (peer) { 175 tot->rx_dropped = veth_stats_one(&one, peer); 176 tot->rx_bytes = one.bytes; 177 tot->rx_packets = one.packets; 178 } 179 rcu_read_unlock(); 180 181 return tot; 182 } 183 184 /* fake multicast ability */ 185 static void veth_set_multicast_list(struct net_device *dev) 186 { 187 } 188 189 static int veth_open(struct net_device *dev) 190 { 191 struct veth_priv *priv = netdev_priv(dev); 192 struct net_device *peer = rtnl_dereference(priv->peer); 193 194 if (!peer) 195 return -ENOTCONN; 196 197 if (peer->flags & IFF_UP) { 198 netif_carrier_on(dev); 199 netif_carrier_on(peer); 200 } 201 return 0; 202 } 203 204 static int veth_close(struct net_device *dev) 205 { 206 struct veth_priv *priv = netdev_priv(dev); 207 struct net_device *peer = rtnl_dereference(priv->peer); 208 209 netif_carrier_off(dev); 210 if (peer) 211 netif_carrier_off(peer); 212 213 return 0; 214 } 215 216 static int is_valid_veth_mtu(int mtu) 217 { 218 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 219 } 220 221 static int veth_dev_init(struct net_device *dev) 222 { 223 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 224 if (!dev->vstats) 225 return -ENOMEM; 226 return 0; 227 } 228 229 static void veth_dev_free(struct net_device *dev) 230 { 231 free_percpu(dev->vstats); 232 free_netdev(dev); 233 } 234 235 #ifdef CONFIG_NET_POLL_CONTROLLER 236 static void veth_poll_controller(struct net_device *dev) 237 { 238 /* veth only receives frames when its peer sends one 239 * Since it's a synchronous operation, we are guaranteed 240 * never to have pending data when we poll for it so 241 * there is nothing to do here. 242 * 243 * We need this though so netpoll recognizes us as an interface that 244 * supports polling, which enables bridge devices in virt setups to 245 * still use netconsole 246 */ 247 } 248 #endif /* CONFIG_NET_POLL_CONTROLLER */ 249 250 static int veth_get_iflink(const struct net_device *dev) 251 { 252 struct veth_priv *priv = netdev_priv(dev); 253 struct net_device *peer; 254 int iflink; 255 256 rcu_read_lock(); 257 peer = rcu_dereference(priv->peer); 258 iflink = peer ? peer->ifindex : 0; 259 rcu_read_unlock(); 260 261 return iflink; 262 } 263 264 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 265 { 266 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 267 struct net_device *peer; 268 269 if (new_hr < 0) 270 new_hr = 0; 271 272 rcu_read_lock(); 273 peer = rcu_dereference(priv->peer); 274 if (unlikely(!peer)) 275 goto out; 276 277 peer_priv = netdev_priv(peer); 278 priv->requested_headroom = new_hr; 279 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 280 dev->needed_headroom = new_hr; 281 peer->needed_headroom = new_hr; 282 283 out: 284 rcu_read_unlock(); 285 } 286 287 static const struct net_device_ops veth_netdev_ops = { 288 .ndo_init = veth_dev_init, 289 .ndo_open = veth_open, 290 .ndo_stop = veth_close, 291 .ndo_start_xmit = veth_xmit, 292 .ndo_get_stats64 = veth_get_stats64, 293 .ndo_set_rx_mode = veth_set_multicast_list, 294 .ndo_set_mac_address = eth_mac_addr, 295 #ifdef CONFIG_NET_POLL_CONTROLLER 296 .ndo_poll_controller = veth_poll_controller, 297 #endif 298 .ndo_get_iflink = veth_get_iflink, 299 .ndo_features_check = passthru_features_check, 300 .ndo_set_rx_headroom = veth_set_rx_headroom, 301 }; 302 303 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 304 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 305 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 306 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 307 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 308 309 static void veth_setup(struct net_device *dev) 310 { 311 ether_setup(dev); 312 313 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 314 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 315 dev->priv_flags |= IFF_NO_QUEUE; 316 dev->priv_flags |= IFF_PHONY_HEADROOM; 317 318 dev->netdev_ops = &veth_netdev_ops; 319 dev->ethtool_ops = &veth_ethtool_ops; 320 dev->features |= NETIF_F_LLTX; 321 dev->features |= VETH_FEATURES; 322 dev->vlan_features = dev->features & 323 ~(NETIF_F_HW_VLAN_CTAG_TX | 324 NETIF_F_HW_VLAN_STAG_TX | 325 NETIF_F_HW_VLAN_CTAG_RX | 326 NETIF_F_HW_VLAN_STAG_RX); 327 dev->destructor = veth_dev_free; 328 dev->max_mtu = ETH_MAX_MTU; 329 330 dev->hw_features = VETH_FEATURES; 331 dev->hw_enc_features = VETH_FEATURES; 332 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 333 } 334 335 /* 336 * netlink interface 337 */ 338 339 static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) 340 { 341 if (tb[IFLA_ADDRESS]) { 342 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 343 return -EINVAL; 344 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 345 return -EADDRNOTAVAIL; 346 } 347 if (tb[IFLA_MTU]) { 348 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 349 return -EINVAL; 350 } 351 return 0; 352 } 353 354 static struct rtnl_link_ops veth_link_ops; 355 356 static int veth_newlink(struct net *src_net, struct net_device *dev, 357 struct nlattr *tb[], struct nlattr *data[]) 358 { 359 int err; 360 struct net_device *peer; 361 struct veth_priv *priv; 362 char ifname[IFNAMSIZ]; 363 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 364 unsigned char name_assign_type; 365 struct ifinfomsg *ifmp; 366 struct net *net; 367 368 /* 369 * create and register peer first 370 */ 371 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 372 struct nlattr *nla_peer; 373 374 nla_peer = data[VETH_INFO_PEER]; 375 ifmp = nla_data(nla_peer); 376 err = rtnl_nla_parse_ifla(peer_tb, 377 nla_data(nla_peer) + sizeof(struct ifinfomsg), 378 nla_len(nla_peer) - sizeof(struct ifinfomsg)); 379 if (err < 0) 380 return err; 381 382 err = veth_validate(peer_tb, NULL); 383 if (err < 0) 384 return err; 385 386 tbp = peer_tb; 387 } else { 388 ifmp = NULL; 389 tbp = tb; 390 } 391 392 if (tbp[IFLA_IFNAME]) { 393 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 394 name_assign_type = NET_NAME_USER; 395 } else { 396 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 397 name_assign_type = NET_NAME_ENUM; 398 } 399 400 net = rtnl_link_get_net(src_net, tbp); 401 if (IS_ERR(net)) 402 return PTR_ERR(net); 403 404 peer = rtnl_create_link(net, ifname, name_assign_type, 405 &veth_link_ops, tbp); 406 if (IS_ERR(peer)) { 407 put_net(net); 408 return PTR_ERR(peer); 409 } 410 411 if (tbp[IFLA_ADDRESS] == NULL) 412 eth_hw_addr_random(peer); 413 414 if (ifmp && (dev->ifindex != 0)) 415 peer->ifindex = ifmp->ifi_index; 416 417 err = register_netdevice(peer); 418 put_net(net); 419 net = NULL; 420 if (err < 0) 421 goto err_register_peer; 422 423 netif_carrier_off(peer); 424 425 err = rtnl_configure_link(peer, ifmp); 426 if (err < 0) 427 goto err_configure_peer; 428 429 /* 430 * register dev last 431 * 432 * note, that since we've registered new device the dev's name 433 * should be re-allocated 434 */ 435 436 if (tb[IFLA_ADDRESS] == NULL) 437 eth_hw_addr_random(dev); 438 439 if (tb[IFLA_IFNAME]) 440 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 441 else 442 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 443 444 err = register_netdevice(dev); 445 if (err < 0) 446 goto err_register_dev; 447 448 netif_carrier_off(dev); 449 450 /* 451 * tie the deviced together 452 */ 453 454 priv = netdev_priv(dev); 455 rcu_assign_pointer(priv->peer, peer); 456 457 priv = netdev_priv(peer); 458 rcu_assign_pointer(priv->peer, dev); 459 return 0; 460 461 err_register_dev: 462 /* nothing to do */ 463 err_configure_peer: 464 unregister_netdevice(peer); 465 return err; 466 467 err_register_peer: 468 free_netdev(peer); 469 return err; 470 } 471 472 static void veth_dellink(struct net_device *dev, struct list_head *head) 473 { 474 struct veth_priv *priv; 475 struct net_device *peer; 476 477 priv = netdev_priv(dev); 478 peer = rtnl_dereference(priv->peer); 479 480 /* Note : dellink() is called from default_device_exit_batch(), 481 * before a rcu_synchronize() point. The devices are guaranteed 482 * not being freed before one RCU grace period. 483 */ 484 RCU_INIT_POINTER(priv->peer, NULL); 485 unregister_netdevice_queue(dev, head); 486 487 if (peer) { 488 priv = netdev_priv(peer); 489 RCU_INIT_POINTER(priv->peer, NULL); 490 unregister_netdevice_queue(peer, head); 491 } 492 } 493 494 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 495 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 496 }; 497 498 static struct net *veth_get_link_net(const struct net_device *dev) 499 { 500 struct veth_priv *priv = netdev_priv(dev); 501 struct net_device *peer = rtnl_dereference(priv->peer); 502 503 return peer ? dev_net(peer) : dev_net(dev); 504 } 505 506 static struct rtnl_link_ops veth_link_ops = { 507 .kind = DRV_NAME, 508 .priv_size = sizeof(struct veth_priv), 509 .setup = veth_setup, 510 .validate = veth_validate, 511 .newlink = veth_newlink, 512 .dellink = veth_dellink, 513 .policy = veth_policy, 514 .maxtype = VETH_INFO_MAX, 515 .get_link_net = veth_get_link_net, 516 }; 517 518 /* 519 * init/fini 520 */ 521 522 static __init int veth_init(void) 523 { 524 return rtnl_link_register(&veth_link_ops); 525 } 526 527 static __exit void veth_exit(void) 528 { 529 rtnl_link_unregister(&veth_link_ops); 530 } 531 532 module_init(veth_init); 533 module_exit(veth_exit); 534 535 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 536 MODULE_LICENSE("GPL v2"); 537 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 538