1 /* 2 * drivers/net/veth.c 3 * 4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5 * 6 * Author: Pavel Emelianov <xemul@openvz.org> 7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8 * 9 */ 10 11 #include <linux/netdevice.h> 12 #include <linux/slab.h> 13 #include <linux/ethtool.h> 14 #include <linux/etherdevice.h> 15 #include <linux/u64_stats_sync.h> 16 17 #include <net/rtnetlink.h> 18 #include <net/dst.h> 19 #include <net/xfrm.h> 20 #include <linux/veth.h> 21 #include <linux/module.h> 22 23 #define DRV_NAME "veth" 24 #define DRV_VERSION "1.0" 25 26 struct pcpu_vstats { 27 u64 packets; 28 u64 bytes; 29 struct u64_stats_sync syncp; 30 }; 31 32 struct veth_priv { 33 struct net_device __rcu *peer; 34 atomic64_t dropped; 35 unsigned requested_headroom; 36 }; 37 38 /* 39 * ethtool interface 40 */ 41 42 static struct { 43 const char string[ETH_GSTRING_LEN]; 44 } ethtool_stats_keys[] = { 45 { "peer_ifindex" }, 46 }; 47 48 static int veth_get_link_ksettings(struct net_device *dev, 49 struct ethtool_link_ksettings *cmd) 50 { 51 cmd->base.speed = SPEED_10000; 52 cmd->base.duplex = DUPLEX_FULL; 53 cmd->base.port = PORT_TP; 54 cmd->base.autoneg = AUTONEG_DISABLE; 55 return 0; 56 } 57 58 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 59 { 60 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 61 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 62 } 63 64 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 65 { 66 switch(stringset) { 67 case ETH_SS_STATS: 68 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 69 break; 70 } 71 } 72 73 static int veth_get_sset_count(struct net_device *dev, int sset) 74 { 75 switch (sset) { 76 case ETH_SS_STATS: 77 return ARRAY_SIZE(ethtool_stats_keys); 78 default: 79 return -EOPNOTSUPP; 80 } 81 } 82 83 static void veth_get_ethtool_stats(struct net_device *dev, 84 struct ethtool_stats *stats, u64 *data) 85 { 86 struct veth_priv *priv = netdev_priv(dev); 87 struct net_device *peer = rtnl_dereference(priv->peer); 88 89 data[0] = peer ? peer->ifindex : 0; 90 } 91 92 static const struct ethtool_ops veth_ethtool_ops = { 93 .get_drvinfo = veth_get_drvinfo, 94 .get_link = ethtool_op_get_link, 95 .get_strings = veth_get_strings, 96 .get_sset_count = veth_get_sset_count, 97 .get_ethtool_stats = veth_get_ethtool_stats, 98 .get_link_ksettings = veth_get_link_ksettings, 99 }; 100 101 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 102 { 103 struct veth_priv *priv = netdev_priv(dev); 104 struct net_device *rcv; 105 int length = skb->len; 106 107 rcu_read_lock(); 108 rcv = rcu_dereference(priv->peer); 109 if (unlikely(!rcv)) { 110 kfree_skb(skb); 111 goto drop; 112 } 113 114 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 115 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 116 117 u64_stats_update_begin(&stats->syncp); 118 stats->bytes += length; 119 stats->packets++; 120 u64_stats_update_end(&stats->syncp); 121 } else { 122 drop: 123 atomic64_inc(&priv->dropped); 124 } 125 rcu_read_unlock(); 126 return NETDEV_TX_OK; 127 } 128 129 /* 130 * general routines 131 */ 132 133 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 134 { 135 struct veth_priv *priv = netdev_priv(dev); 136 int cpu; 137 138 result->packets = 0; 139 result->bytes = 0; 140 for_each_possible_cpu(cpu) { 141 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 142 u64 packets, bytes; 143 unsigned int start; 144 145 do { 146 start = u64_stats_fetch_begin_irq(&stats->syncp); 147 packets = stats->packets; 148 bytes = stats->bytes; 149 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 150 result->packets += packets; 151 result->bytes += bytes; 152 } 153 return atomic64_read(&priv->dropped); 154 } 155 156 static void veth_get_stats64(struct net_device *dev, 157 struct rtnl_link_stats64 *tot) 158 { 159 struct veth_priv *priv = netdev_priv(dev); 160 struct net_device *peer; 161 struct pcpu_vstats one; 162 163 tot->tx_dropped = veth_stats_one(&one, dev); 164 tot->tx_bytes = one.bytes; 165 tot->tx_packets = one.packets; 166 167 rcu_read_lock(); 168 peer = rcu_dereference(priv->peer); 169 if (peer) { 170 tot->rx_dropped = veth_stats_one(&one, peer); 171 tot->rx_bytes = one.bytes; 172 tot->rx_packets = one.packets; 173 } 174 rcu_read_unlock(); 175 } 176 177 /* fake multicast ability */ 178 static void veth_set_multicast_list(struct net_device *dev) 179 { 180 } 181 182 static int veth_open(struct net_device *dev) 183 { 184 struct veth_priv *priv = netdev_priv(dev); 185 struct net_device *peer = rtnl_dereference(priv->peer); 186 187 if (!peer) 188 return -ENOTCONN; 189 190 if (peer->flags & IFF_UP) { 191 netif_carrier_on(dev); 192 netif_carrier_on(peer); 193 } 194 return 0; 195 } 196 197 static int veth_close(struct net_device *dev) 198 { 199 struct veth_priv *priv = netdev_priv(dev); 200 struct net_device *peer = rtnl_dereference(priv->peer); 201 202 netif_carrier_off(dev); 203 if (peer) 204 netif_carrier_off(peer); 205 206 return 0; 207 } 208 209 static int is_valid_veth_mtu(int mtu) 210 { 211 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 212 } 213 214 static int veth_dev_init(struct net_device *dev) 215 { 216 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 217 if (!dev->vstats) 218 return -ENOMEM; 219 return 0; 220 } 221 222 static void veth_dev_free(struct net_device *dev) 223 { 224 free_percpu(dev->vstats); 225 free_netdev(dev); 226 } 227 228 #ifdef CONFIG_NET_POLL_CONTROLLER 229 static void veth_poll_controller(struct net_device *dev) 230 { 231 /* veth only receives frames when its peer sends one 232 * Since it's a synchronous operation, we are guaranteed 233 * never to have pending data when we poll for it so 234 * there is nothing to do here. 235 * 236 * We need this though so netpoll recognizes us as an interface that 237 * supports polling, which enables bridge devices in virt setups to 238 * still use netconsole 239 */ 240 } 241 #endif /* CONFIG_NET_POLL_CONTROLLER */ 242 243 static int veth_get_iflink(const struct net_device *dev) 244 { 245 struct veth_priv *priv = netdev_priv(dev); 246 struct net_device *peer; 247 int iflink; 248 249 rcu_read_lock(); 250 peer = rcu_dereference(priv->peer); 251 iflink = peer ? peer->ifindex : 0; 252 rcu_read_unlock(); 253 254 return iflink; 255 } 256 257 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 258 { 259 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 260 struct net_device *peer; 261 262 if (new_hr < 0) 263 new_hr = 0; 264 265 rcu_read_lock(); 266 peer = rcu_dereference(priv->peer); 267 if (unlikely(!peer)) 268 goto out; 269 270 peer_priv = netdev_priv(peer); 271 priv->requested_headroom = new_hr; 272 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 273 dev->needed_headroom = new_hr; 274 peer->needed_headroom = new_hr; 275 276 out: 277 rcu_read_unlock(); 278 } 279 280 static const struct net_device_ops veth_netdev_ops = { 281 .ndo_init = veth_dev_init, 282 .ndo_open = veth_open, 283 .ndo_stop = veth_close, 284 .ndo_start_xmit = veth_xmit, 285 .ndo_get_stats64 = veth_get_stats64, 286 .ndo_set_rx_mode = veth_set_multicast_list, 287 .ndo_set_mac_address = eth_mac_addr, 288 #ifdef CONFIG_NET_POLL_CONTROLLER 289 .ndo_poll_controller = veth_poll_controller, 290 #endif 291 .ndo_get_iflink = veth_get_iflink, 292 .ndo_features_check = passthru_features_check, 293 .ndo_set_rx_headroom = veth_set_rx_headroom, 294 }; 295 296 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 297 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 298 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 299 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 300 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 301 302 static void veth_setup(struct net_device *dev) 303 { 304 ether_setup(dev); 305 306 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 307 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 308 dev->priv_flags |= IFF_NO_QUEUE; 309 dev->priv_flags |= IFF_PHONY_HEADROOM; 310 311 dev->netdev_ops = &veth_netdev_ops; 312 dev->ethtool_ops = &veth_ethtool_ops; 313 dev->features |= NETIF_F_LLTX; 314 dev->features |= VETH_FEATURES; 315 dev->vlan_features = dev->features & 316 ~(NETIF_F_HW_VLAN_CTAG_TX | 317 NETIF_F_HW_VLAN_STAG_TX | 318 NETIF_F_HW_VLAN_CTAG_RX | 319 NETIF_F_HW_VLAN_STAG_RX); 320 dev->destructor = veth_dev_free; 321 dev->max_mtu = ETH_MAX_MTU; 322 323 dev->hw_features = VETH_FEATURES; 324 dev->hw_enc_features = VETH_FEATURES; 325 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 326 } 327 328 /* 329 * netlink interface 330 */ 331 332 static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) 333 { 334 if (tb[IFLA_ADDRESS]) { 335 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 336 return -EINVAL; 337 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 338 return -EADDRNOTAVAIL; 339 } 340 if (tb[IFLA_MTU]) { 341 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 342 return -EINVAL; 343 } 344 return 0; 345 } 346 347 static struct rtnl_link_ops veth_link_ops; 348 349 static int veth_newlink(struct net *src_net, struct net_device *dev, 350 struct nlattr *tb[], struct nlattr *data[]) 351 { 352 int err; 353 struct net_device *peer; 354 struct veth_priv *priv; 355 char ifname[IFNAMSIZ]; 356 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 357 unsigned char name_assign_type; 358 struct ifinfomsg *ifmp; 359 struct net *net; 360 361 /* 362 * create and register peer first 363 */ 364 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 365 struct nlattr *nla_peer; 366 367 nla_peer = data[VETH_INFO_PEER]; 368 ifmp = nla_data(nla_peer); 369 err = rtnl_nla_parse_ifla(peer_tb, 370 nla_data(nla_peer) + sizeof(struct ifinfomsg), 371 nla_len(nla_peer) - sizeof(struct ifinfomsg), 372 NULL); 373 if (err < 0) 374 return err; 375 376 err = veth_validate(peer_tb, NULL); 377 if (err < 0) 378 return err; 379 380 tbp = peer_tb; 381 } else { 382 ifmp = NULL; 383 tbp = tb; 384 } 385 386 if (tbp[IFLA_IFNAME]) { 387 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 388 name_assign_type = NET_NAME_USER; 389 } else { 390 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 391 name_assign_type = NET_NAME_ENUM; 392 } 393 394 net = rtnl_link_get_net(src_net, tbp); 395 if (IS_ERR(net)) 396 return PTR_ERR(net); 397 398 peer = rtnl_create_link(net, ifname, name_assign_type, 399 &veth_link_ops, tbp); 400 if (IS_ERR(peer)) { 401 put_net(net); 402 return PTR_ERR(peer); 403 } 404 405 if (tbp[IFLA_ADDRESS] == NULL) 406 eth_hw_addr_random(peer); 407 408 if (ifmp && (dev->ifindex != 0)) 409 peer->ifindex = ifmp->ifi_index; 410 411 err = register_netdevice(peer); 412 put_net(net); 413 net = NULL; 414 if (err < 0) 415 goto err_register_peer; 416 417 netif_carrier_off(peer); 418 419 err = rtnl_configure_link(peer, ifmp); 420 if (err < 0) 421 goto err_configure_peer; 422 423 /* 424 * register dev last 425 * 426 * note, that since we've registered new device the dev's name 427 * should be re-allocated 428 */ 429 430 if (tb[IFLA_ADDRESS] == NULL) 431 eth_hw_addr_random(dev); 432 433 if (tb[IFLA_IFNAME]) 434 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 435 else 436 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 437 438 err = register_netdevice(dev); 439 if (err < 0) 440 goto err_register_dev; 441 442 netif_carrier_off(dev); 443 444 /* 445 * tie the deviced together 446 */ 447 448 priv = netdev_priv(dev); 449 rcu_assign_pointer(priv->peer, peer); 450 451 priv = netdev_priv(peer); 452 rcu_assign_pointer(priv->peer, dev); 453 return 0; 454 455 err_register_dev: 456 /* nothing to do */ 457 err_configure_peer: 458 unregister_netdevice(peer); 459 return err; 460 461 err_register_peer: 462 free_netdev(peer); 463 return err; 464 } 465 466 static void veth_dellink(struct net_device *dev, struct list_head *head) 467 { 468 struct veth_priv *priv; 469 struct net_device *peer; 470 471 priv = netdev_priv(dev); 472 peer = rtnl_dereference(priv->peer); 473 474 /* Note : dellink() is called from default_device_exit_batch(), 475 * before a rcu_synchronize() point. The devices are guaranteed 476 * not being freed before one RCU grace period. 477 */ 478 RCU_INIT_POINTER(priv->peer, NULL); 479 unregister_netdevice_queue(dev, head); 480 481 if (peer) { 482 priv = netdev_priv(peer); 483 RCU_INIT_POINTER(priv->peer, NULL); 484 unregister_netdevice_queue(peer, head); 485 } 486 } 487 488 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 489 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 490 }; 491 492 static struct net *veth_get_link_net(const struct net_device *dev) 493 { 494 struct veth_priv *priv = netdev_priv(dev); 495 struct net_device *peer = rtnl_dereference(priv->peer); 496 497 return peer ? dev_net(peer) : dev_net(dev); 498 } 499 500 static struct rtnl_link_ops veth_link_ops = { 501 .kind = DRV_NAME, 502 .priv_size = sizeof(struct veth_priv), 503 .setup = veth_setup, 504 .validate = veth_validate, 505 .newlink = veth_newlink, 506 .dellink = veth_dellink, 507 .policy = veth_policy, 508 .maxtype = VETH_INFO_MAX, 509 .get_link_net = veth_get_link_net, 510 }; 511 512 /* 513 * init/fini 514 */ 515 516 static __init int veth_init(void) 517 { 518 return rtnl_link_register(&veth_link_ops); 519 } 520 521 static __exit void veth_exit(void) 522 { 523 rtnl_link_unregister(&veth_link_ops); 524 } 525 526 module_init(veth_init); 527 module_exit(veth_exit); 528 529 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 530 MODULE_LICENSE("GPL v2"); 531 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 532