1 /* 2 * drivers/net/veth.c 3 * 4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5 * 6 * Author: Pavel Emelianov <xemul@openvz.org> 7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8 * 9 */ 10 11 #include <linux/netdevice.h> 12 #include <linux/slab.h> 13 #include <linux/ethtool.h> 14 #include <linux/etherdevice.h> 15 #include <linux/u64_stats_sync.h> 16 17 #include <net/rtnetlink.h> 18 #include <net/dst.h> 19 #include <net/xfrm.h> 20 #include <linux/veth.h> 21 #include <linux/module.h> 22 23 #define DRV_NAME "veth" 24 #define DRV_VERSION "1.0" 25 26 struct pcpu_vstats { 27 u64 packets; 28 u64 bytes; 29 struct u64_stats_sync syncp; 30 }; 31 32 struct veth_priv { 33 struct net_device __rcu *peer; 34 atomic64_t dropped; 35 unsigned requested_headroom; 36 }; 37 38 /* 39 * ethtool interface 40 */ 41 42 static struct { 43 const char string[ETH_GSTRING_LEN]; 44 } ethtool_stats_keys[] = { 45 { "peer_ifindex" }, 46 }; 47 48 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 49 { 50 cmd->supported = 0; 51 cmd->advertising = 0; 52 ethtool_cmd_speed_set(cmd, SPEED_10000); 53 cmd->duplex = DUPLEX_FULL; 54 cmd->port = PORT_TP; 55 cmd->phy_address = 0; 56 cmd->transceiver = XCVR_INTERNAL; 57 cmd->autoneg = AUTONEG_DISABLE; 58 cmd->maxtxpkt = 0; 59 cmd->maxrxpkt = 0; 60 return 0; 61 } 62 63 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 64 { 65 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 66 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 67 } 68 69 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 70 { 71 switch(stringset) { 72 case ETH_SS_STATS: 73 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 74 break; 75 } 76 } 77 78 static int veth_get_sset_count(struct net_device *dev, int sset) 79 { 80 switch (sset) { 81 case ETH_SS_STATS: 82 return ARRAY_SIZE(ethtool_stats_keys); 83 default: 84 return -EOPNOTSUPP; 85 } 86 } 87 88 static void veth_get_ethtool_stats(struct net_device *dev, 89 struct ethtool_stats *stats, u64 *data) 90 { 91 struct veth_priv *priv = netdev_priv(dev); 92 struct net_device *peer = rtnl_dereference(priv->peer); 93 94 data[0] = peer ? peer->ifindex : 0; 95 } 96 97 static const struct ethtool_ops veth_ethtool_ops = { 98 .get_settings = veth_get_settings, 99 .get_drvinfo = veth_get_drvinfo, 100 .get_link = ethtool_op_get_link, 101 .get_strings = veth_get_strings, 102 .get_sset_count = veth_get_sset_count, 103 .get_ethtool_stats = veth_get_ethtool_stats, 104 }; 105 106 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 107 { 108 struct veth_priv *priv = netdev_priv(dev); 109 struct net_device *rcv; 110 int length = skb->len; 111 112 rcu_read_lock(); 113 rcv = rcu_dereference(priv->peer); 114 if (unlikely(!rcv)) { 115 kfree_skb(skb); 116 goto drop; 117 } 118 119 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 120 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 121 122 u64_stats_update_begin(&stats->syncp); 123 stats->bytes += length; 124 stats->packets++; 125 u64_stats_update_end(&stats->syncp); 126 } else { 127 drop: 128 atomic64_inc(&priv->dropped); 129 } 130 rcu_read_unlock(); 131 return NETDEV_TX_OK; 132 } 133 134 /* 135 * general routines 136 */ 137 138 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 139 { 140 struct veth_priv *priv = netdev_priv(dev); 141 int cpu; 142 143 result->packets = 0; 144 result->bytes = 0; 145 for_each_possible_cpu(cpu) { 146 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 147 u64 packets, bytes; 148 unsigned int start; 149 150 do { 151 start = u64_stats_fetch_begin_irq(&stats->syncp); 152 packets = stats->packets; 153 bytes = stats->bytes; 154 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 155 result->packets += packets; 156 result->bytes += bytes; 157 } 158 return atomic64_read(&priv->dropped); 159 } 160 161 static void veth_get_stats64(struct net_device *dev, 162 struct rtnl_link_stats64 *tot) 163 { 164 struct veth_priv *priv = netdev_priv(dev); 165 struct net_device *peer; 166 struct pcpu_vstats one; 167 168 tot->tx_dropped = veth_stats_one(&one, dev); 169 tot->tx_bytes = one.bytes; 170 tot->tx_packets = one.packets; 171 172 rcu_read_lock(); 173 peer = rcu_dereference(priv->peer); 174 if (peer) { 175 tot->rx_dropped = veth_stats_one(&one, peer); 176 tot->rx_bytes = one.bytes; 177 tot->rx_packets = one.packets; 178 } 179 rcu_read_unlock(); 180 } 181 182 /* fake multicast ability */ 183 static void veth_set_multicast_list(struct net_device *dev) 184 { 185 } 186 187 static int veth_open(struct net_device *dev) 188 { 189 struct veth_priv *priv = netdev_priv(dev); 190 struct net_device *peer = rtnl_dereference(priv->peer); 191 192 if (!peer) 193 return -ENOTCONN; 194 195 if (peer->flags & IFF_UP) { 196 netif_carrier_on(dev); 197 netif_carrier_on(peer); 198 } 199 return 0; 200 } 201 202 static int veth_close(struct net_device *dev) 203 { 204 struct veth_priv *priv = netdev_priv(dev); 205 struct net_device *peer = rtnl_dereference(priv->peer); 206 207 netif_carrier_off(dev); 208 if (peer) 209 netif_carrier_off(peer); 210 211 return 0; 212 } 213 214 static int is_valid_veth_mtu(int mtu) 215 { 216 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 217 } 218 219 static int veth_dev_init(struct net_device *dev) 220 { 221 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 222 if (!dev->vstats) 223 return -ENOMEM; 224 return 0; 225 } 226 227 static void veth_dev_free(struct net_device *dev) 228 { 229 free_percpu(dev->vstats); 230 free_netdev(dev); 231 } 232 233 #ifdef CONFIG_NET_POLL_CONTROLLER 234 static void veth_poll_controller(struct net_device *dev) 235 { 236 /* veth only receives frames when its peer sends one 237 * Since it's a synchronous operation, we are guaranteed 238 * never to have pending data when we poll for it so 239 * there is nothing to do here. 240 * 241 * We need this though so netpoll recognizes us as an interface that 242 * supports polling, which enables bridge devices in virt setups to 243 * still use netconsole 244 */ 245 } 246 #endif /* CONFIG_NET_POLL_CONTROLLER */ 247 248 static int veth_get_iflink(const struct net_device *dev) 249 { 250 struct veth_priv *priv = netdev_priv(dev); 251 struct net_device *peer; 252 int iflink; 253 254 rcu_read_lock(); 255 peer = rcu_dereference(priv->peer); 256 iflink = peer ? peer->ifindex : 0; 257 rcu_read_unlock(); 258 259 return iflink; 260 } 261 262 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 263 { 264 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 265 struct net_device *peer; 266 267 if (new_hr < 0) 268 new_hr = 0; 269 270 rcu_read_lock(); 271 peer = rcu_dereference(priv->peer); 272 if (unlikely(!peer)) 273 goto out; 274 275 peer_priv = netdev_priv(peer); 276 priv->requested_headroom = new_hr; 277 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 278 dev->needed_headroom = new_hr; 279 peer->needed_headroom = new_hr; 280 281 out: 282 rcu_read_unlock(); 283 } 284 285 static const struct net_device_ops veth_netdev_ops = { 286 .ndo_init = veth_dev_init, 287 .ndo_open = veth_open, 288 .ndo_stop = veth_close, 289 .ndo_start_xmit = veth_xmit, 290 .ndo_get_stats64 = veth_get_stats64, 291 .ndo_set_rx_mode = veth_set_multicast_list, 292 .ndo_set_mac_address = eth_mac_addr, 293 #ifdef CONFIG_NET_POLL_CONTROLLER 294 .ndo_poll_controller = veth_poll_controller, 295 #endif 296 .ndo_get_iflink = veth_get_iflink, 297 .ndo_features_check = passthru_features_check, 298 .ndo_set_rx_headroom = veth_set_rx_headroom, 299 }; 300 301 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 302 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 303 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 304 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 305 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 306 307 static void veth_setup(struct net_device *dev) 308 { 309 ether_setup(dev); 310 311 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 312 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 313 dev->priv_flags |= IFF_NO_QUEUE; 314 dev->priv_flags |= IFF_PHONY_HEADROOM; 315 316 dev->netdev_ops = &veth_netdev_ops; 317 dev->ethtool_ops = &veth_ethtool_ops; 318 dev->features |= NETIF_F_LLTX; 319 dev->features |= VETH_FEATURES; 320 dev->vlan_features = dev->features & 321 ~(NETIF_F_HW_VLAN_CTAG_TX | 322 NETIF_F_HW_VLAN_STAG_TX | 323 NETIF_F_HW_VLAN_CTAG_RX | 324 NETIF_F_HW_VLAN_STAG_RX); 325 dev->destructor = veth_dev_free; 326 dev->max_mtu = ETH_MAX_MTU; 327 328 dev->hw_features = VETH_FEATURES; 329 dev->hw_enc_features = VETH_FEATURES; 330 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 331 } 332 333 /* 334 * netlink interface 335 */ 336 337 static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) 338 { 339 if (tb[IFLA_ADDRESS]) { 340 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 341 return -EINVAL; 342 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 343 return -EADDRNOTAVAIL; 344 } 345 if (tb[IFLA_MTU]) { 346 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 347 return -EINVAL; 348 } 349 return 0; 350 } 351 352 static struct rtnl_link_ops veth_link_ops; 353 354 static int veth_newlink(struct net *src_net, struct net_device *dev, 355 struct nlattr *tb[], struct nlattr *data[]) 356 { 357 int err; 358 struct net_device *peer; 359 struct veth_priv *priv; 360 char ifname[IFNAMSIZ]; 361 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 362 unsigned char name_assign_type; 363 struct ifinfomsg *ifmp; 364 struct net *net; 365 366 /* 367 * create and register peer first 368 */ 369 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 370 struct nlattr *nla_peer; 371 372 nla_peer = data[VETH_INFO_PEER]; 373 ifmp = nla_data(nla_peer); 374 err = rtnl_nla_parse_ifla(peer_tb, 375 nla_data(nla_peer) + sizeof(struct ifinfomsg), 376 nla_len(nla_peer) - sizeof(struct ifinfomsg)); 377 if (err < 0) 378 return err; 379 380 err = veth_validate(peer_tb, NULL); 381 if (err < 0) 382 return err; 383 384 tbp = peer_tb; 385 } else { 386 ifmp = NULL; 387 tbp = tb; 388 } 389 390 if (tbp[IFLA_IFNAME]) { 391 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 392 name_assign_type = NET_NAME_USER; 393 } else { 394 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 395 name_assign_type = NET_NAME_ENUM; 396 } 397 398 net = rtnl_link_get_net(src_net, tbp); 399 if (IS_ERR(net)) 400 return PTR_ERR(net); 401 402 peer = rtnl_create_link(net, ifname, name_assign_type, 403 &veth_link_ops, tbp); 404 if (IS_ERR(peer)) { 405 put_net(net); 406 return PTR_ERR(peer); 407 } 408 409 if (tbp[IFLA_ADDRESS] == NULL) 410 eth_hw_addr_random(peer); 411 412 if (ifmp && (dev->ifindex != 0)) 413 peer->ifindex = ifmp->ifi_index; 414 415 err = register_netdevice(peer); 416 put_net(net); 417 net = NULL; 418 if (err < 0) 419 goto err_register_peer; 420 421 netif_carrier_off(peer); 422 423 err = rtnl_configure_link(peer, ifmp); 424 if (err < 0) 425 goto err_configure_peer; 426 427 /* 428 * register dev last 429 * 430 * note, that since we've registered new device the dev's name 431 * should be re-allocated 432 */ 433 434 if (tb[IFLA_ADDRESS] == NULL) 435 eth_hw_addr_random(dev); 436 437 if (tb[IFLA_IFNAME]) 438 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 439 else 440 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 441 442 err = register_netdevice(dev); 443 if (err < 0) 444 goto err_register_dev; 445 446 netif_carrier_off(dev); 447 448 /* 449 * tie the deviced together 450 */ 451 452 priv = netdev_priv(dev); 453 rcu_assign_pointer(priv->peer, peer); 454 455 priv = netdev_priv(peer); 456 rcu_assign_pointer(priv->peer, dev); 457 return 0; 458 459 err_register_dev: 460 /* nothing to do */ 461 err_configure_peer: 462 unregister_netdevice(peer); 463 return err; 464 465 err_register_peer: 466 free_netdev(peer); 467 return err; 468 } 469 470 static void veth_dellink(struct net_device *dev, struct list_head *head) 471 { 472 struct veth_priv *priv; 473 struct net_device *peer; 474 475 priv = netdev_priv(dev); 476 peer = rtnl_dereference(priv->peer); 477 478 /* Note : dellink() is called from default_device_exit_batch(), 479 * before a rcu_synchronize() point. The devices are guaranteed 480 * not being freed before one RCU grace period. 481 */ 482 RCU_INIT_POINTER(priv->peer, NULL); 483 unregister_netdevice_queue(dev, head); 484 485 if (peer) { 486 priv = netdev_priv(peer); 487 RCU_INIT_POINTER(priv->peer, NULL); 488 unregister_netdevice_queue(peer, head); 489 } 490 } 491 492 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 493 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 494 }; 495 496 static struct net *veth_get_link_net(const struct net_device *dev) 497 { 498 struct veth_priv *priv = netdev_priv(dev); 499 struct net_device *peer = rtnl_dereference(priv->peer); 500 501 return peer ? dev_net(peer) : dev_net(dev); 502 } 503 504 static struct rtnl_link_ops veth_link_ops = { 505 .kind = DRV_NAME, 506 .priv_size = sizeof(struct veth_priv), 507 .setup = veth_setup, 508 .validate = veth_validate, 509 .newlink = veth_newlink, 510 .dellink = veth_dellink, 511 .policy = veth_policy, 512 .maxtype = VETH_INFO_MAX, 513 .get_link_net = veth_get_link_net, 514 }; 515 516 /* 517 * init/fini 518 */ 519 520 static __init int veth_init(void) 521 { 522 return rtnl_link_register(&veth_link_ops); 523 } 524 525 static __exit void veth_exit(void) 526 { 527 rtnl_link_unregister(&veth_link_ops); 528 } 529 530 module_init(veth_init); 531 module_exit(veth_exit); 532 533 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 534 MODULE_LICENSE("GPL v2"); 535 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 536