1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static unsigned int ipvlan_netid __read_mostly; 13 14 struct ipvlan_netns { 15 unsigned int ipvl_nf_hook_refcnt; 16 }; 17 18 static const struct nf_hook_ops ipvl_nfops[] = { 19 { 20 .hook = ipvlan_nf_input, 21 .pf = NFPROTO_IPV4, 22 .hooknum = NF_INET_LOCAL_IN, 23 .priority = INT_MAX, 24 }, 25 #if IS_ENABLED(CONFIG_IPV6) 26 { 27 .hook = ipvlan_nf_input, 28 .pf = NFPROTO_IPV6, 29 .hooknum = NF_INET_LOCAL_IN, 30 .priority = INT_MAX, 31 }, 32 #endif 33 }; 34 35 static const struct l3mdev_ops ipvl_l3mdev_ops = { 36 .l3mdev_l3_rcv = ipvlan_l3_rcv, 37 }; 38 39 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) 40 { 41 ipvlan->dev->mtu = dev->mtu; 42 } 43 44 static int ipvlan_register_nf_hook(struct net *net) 45 { 46 struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 47 int err = 0; 48 49 if (!vnet->ipvl_nf_hook_refcnt) { 50 err = nf_register_net_hooks(net, ipvl_nfops, 51 ARRAY_SIZE(ipvl_nfops)); 52 if (!err) 53 vnet->ipvl_nf_hook_refcnt = 1; 54 } else { 55 vnet->ipvl_nf_hook_refcnt++; 56 } 57 58 return err; 59 } 60 61 static void ipvlan_unregister_nf_hook(struct net *net) 62 { 63 struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 64 65 if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) 66 return; 67 68 vnet->ipvl_nf_hook_refcnt--; 69 if (!vnet->ipvl_nf_hook_refcnt) 70 nf_unregister_net_hooks(net, ipvl_nfops, 71 ARRAY_SIZE(ipvl_nfops)); 72 } 73 74 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) 75 { 76 struct ipvl_dev *ipvlan; 77 struct net_device *mdev = port->dev; 78 unsigned int flags; 79 int err; 80 81 ASSERT_RTNL(); 82 if (port->mode != nval) { 83 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 84 flags = ipvlan->dev->flags; 85 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { 86 err = dev_change_flags(ipvlan->dev, 87 flags | IFF_NOARP); 88 } else { 89 err = dev_change_flags(ipvlan->dev, 90 flags & ~IFF_NOARP); 91 } 92 if (unlikely(err)) 93 goto fail; 94 } 95 if (nval == IPVLAN_MODE_L3S) { 96 /* New mode is L3S */ 97 err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); 98 if (!err) { 99 mdev->l3mdev_ops = &ipvl_l3mdev_ops; 100 mdev->priv_flags |= IFF_L3MDEV_MASTER; 101 } else 102 goto fail; 103 } else if (port->mode == IPVLAN_MODE_L3S) { 104 /* Old mode was L3S */ 105 mdev->priv_flags &= ~IFF_L3MDEV_MASTER; 106 ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); 107 mdev->l3mdev_ops = NULL; 108 } 109 port->mode = nval; 110 } 111 return 0; 112 113 fail: 114 /* Undo the flags changes that have been done so far. */ 115 list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { 116 flags = ipvlan->dev->flags; 117 if (port->mode == IPVLAN_MODE_L3 || 118 port->mode == IPVLAN_MODE_L3S) 119 dev_change_flags(ipvlan->dev, flags | IFF_NOARP); 120 else 121 dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP); 122 } 123 124 return err; 125 } 126 127 static int ipvlan_port_create(struct net_device *dev) 128 { 129 struct ipvl_port *port; 130 int err, idx; 131 132 port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); 133 if (!port) 134 return -ENOMEM; 135 136 write_pnet(&port->pnet, dev_net(dev)); 137 port->dev = dev; 138 port->mode = IPVLAN_MODE_L3; 139 INIT_LIST_HEAD(&port->ipvlans); 140 for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) 141 INIT_HLIST_HEAD(&port->hlhead[idx]); 142 143 skb_queue_head_init(&port->backlog); 144 INIT_WORK(&port->wq, ipvlan_process_multicast); 145 ida_init(&port->ida); 146 port->dev_id_start = 1; 147 148 err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); 149 if (err) 150 goto err; 151 152 return 0; 153 154 err: 155 kfree(port); 156 return err; 157 } 158 159 static void ipvlan_port_destroy(struct net_device *dev) 160 { 161 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 162 struct sk_buff *skb; 163 164 if (port->mode == IPVLAN_MODE_L3S) { 165 dev->priv_flags &= ~IFF_L3MDEV_MASTER; 166 ipvlan_unregister_nf_hook(dev_net(dev)); 167 dev->l3mdev_ops = NULL; 168 } 169 netdev_rx_handler_unregister(dev); 170 cancel_work_sync(&port->wq); 171 while ((skb = __skb_dequeue(&port->backlog)) != NULL) { 172 if (skb->dev) 173 dev_put(skb->dev); 174 kfree_skb(skb); 175 } 176 ida_destroy(&port->ida); 177 kfree(port); 178 } 179 180 #define IPVLAN_FEATURES \ 181 (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ 182 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ 183 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ 184 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) 185 186 #define IPVLAN_STATE_MASK \ 187 ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) 188 189 static int ipvlan_init(struct net_device *dev) 190 { 191 struct ipvl_dev *ipvlan = netdev_priv(dev); 192 struct net_device *phy_dev = ipvlan->phy_dev; 193 struct ipvl_port *port; 194 int err; 195 196 dev->state = (dev->state & ~IPVLAN_STATE_MASK) | 197 (phy_dev->state & IPVLAN_STATE_MASK); 198 dev->features = phy_dev->features & IPVLAN_FEATURES; 199 dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED; 200 dev->gso_max_size = phy_dev->gso_max_size; 201 dev->gso_max_segs = phy_dev->gso_max_segs; 202 dev->hard_header_len = phy_dev->hard_header_len; 203 204 netdev_lockdep_set_classes(dev); 205 206 ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); 207 if (!ipvlan->pcpu_stats) 208 return -ENOMEM; 209 210 if (!netif_is_ipvlan_port(phy_dev)) { 211 err = ipvlan_port_create(phy_dev); 212 if (err < 0) { 213 free_percpu(ipvlan->pcpu_stats); 214 return err; 215 } 216 } 217 port = ipvlan_port_get_rtnl(phy_dev); 218 port->count += 1; 219 return 0; 220 } 221 222 static void ipvlan_uninit(struct net_device *dev) 223 { 224 struct ipvl_dev *ipvlan = netdev_priv(dev); 225 struct net_device *phy_dev = ipvlan->phy_dev; 226 struct ipvl_port *port; 227 228 free_percpu(ipvlan->pcpu_stats); 229 230 port = ipvlan_port_get_rtnl(phy_dev); 231 port->count -= 1; 232 if (!port->count) 233 ipvlan_port_destroy(port->dev); 234 } 235 236 static int ipvlan_open(struct net_device *dev) 237 { 238 struct ipvl_dev *ipvlan = netdev_priv(dev); 239 struct net_device *phy_dev = ipvlan->phy_dev; 240 struct ipvl_addr *addr; 241 242 if (ipvlan->port->mode == IPVLAN_MODE_L3 || 243 ipvlan->port->mode == IPVLAN_MODE_L3S) 244 dev->flags |= IFF_NOARP; 245 else 246 dev->flags &= ~IFF_NOARP; 247 248 rcu_read_lock(); 249 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 250 ipvlan_ht_addr_add(ipvlan, addr); 251 rcu_read_unlock(); 252 253 return dev_uc_add(phy_dev, phy_dev->dev_addr); 254 } 255 256 static int ipvlan_stop(struct net_device *dev) 257 { 258 struct ipvl_dev *ipvlan = netdev_priv(dev); 259 struct net_device *phy_dev = ipvlan->phy_dev; 260 struct ipvl_addr *addr; 261 262 dev_uc_unsync(phy_dev, dev); 263 dev_mc_unsync(phy_dev, dev); 264 265 dev_uc_del(phy_dev, phy_dev->dev_addr); 266 267 rcu_read_lock(); 268 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 269 ipvlan_ht_addr_del(addr); 270 rcu_read_unlock(); 271 272 return 0; 273 } 274 275 static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, 276 struct net_device *dev) 277 { 278 const struct ipvl_dev *ipvlan = netdev_priv(dev); 279 int skblen = skb->len; 280 int ret; 281 282 ret = ipvlan_queue_xmit(skb, dev); 283 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { 284 struct ipvl_pcpu_stats *pcptr; 285 286 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 287 288 u64_stats_update_begin(&pcptr->syncp); 289 pcptr->tx_pkts++; 290 pcptr->tx_bytes += skblen; 291 u64_stats_update_end(&pcptr->syncp); 292 } else { 293 this_cpu_inc(ipvlan->pcpu_stats->tx_drps); 294 } 295 return ret; 296 } 297 298 static netdev_features_t ipvlan_fix_features(struct net_device *dev, 299 netdev_features_t features) 300 { 301 struct ipvl_dev *ipvlan = netdev_priv(dev); 302 303 return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES); 304 } 305 306 static void ipvlan_change_rx_flags(struct net_device *dev, int change) 307 { 308 struct ipvl_dev *ipvlan = netdev_priv(dev); 309 struct net_device *phy_dev = ipvlan->phy_dev; 310 311 if (change & IFF_ALLMULTI) 312 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); 313 } 314 315 static void ipvlan_set_multicast_mac_filter(struct net_device *dev) 316 { 317 struct ipvl_dev *ipvlan = netdev_priv(dev); 318 319 if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { 320 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); 321 } else { 322 struct netdev_hw_addr *ha; 323 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); 324 325 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); 326 netdev_for_each_mc_addr(ha, dev) 327 __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); 328 329 /* Turn-on broadcast bit irrespective of address family, 330 * since broadcast is deferred to a work-queue, hence no 331 * impact on fast-path processing. 332 */ 333 __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); 334 335 bitmap_copy(ipvlan->mac_filters, mc_filters, 336 IPVLAN_MAC_FILTER_SIZE); 337 } 338 dev_uc_sync(ipvlan->phy_dev, dev); 339 dev_mc_sync(ipvlan->phy_dev, dev); 340 } 341 342 static void ipvlan_get_stats64(struct net_device *dev, 343 struct rtnl_link_stats64 *s) 344 { 345 struct ipvl_dev *ipvlan = netdev_priv(dev); 346 347 if (ipvlan->pcpu_stats) { 348 struct ipvl_pcpu_stats *pcptr; 349 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; 350 u32 rx_errs = 0, tx_drps = 0; 351 u32 strt; 352 int idx; 353 354 for_each_possible_cpu(idx) { 355 pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); 356 do { 357 strt= u64_stats_fetch_begin_irq(&pcptr->syncp); 358 rx_pkts = pcptr->rx_pkts; 359 rx_bytes = pcptr->rx_bytes; 360 rx_mcast = pcptr->rx_mcast; 361 tx_pkts = pcptr->tx_pkts; 362 tx_bytes = pcptr->tx_bytes; 363 } while (u64_stats_fetch_retry_irq(&pcptr->syncp, 364 strt)); 365 366 s->rx_packets += rx_pkts; 367 s->rx_bytes += rx_bytes; 368 s->multicast += rx_mcast; 369 s->tx_packets += tx_pkts; 370 s->tx_bytes += tx_bytes; 371 372 /* u32 values are updated without syncp protection. */ 373 rx_errs += pcptr->rx_errs; 374 tx_drps += pcptr->tx_drps; 375 } 376 s->rx_errors = rx_errs; 377 s->rx_dropped = rx_errs; 378 s->tx_dropped = tx_drps; 379 } 380 } 381 382 static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 383 { 384 struct ipvl_dev *ipvlan = netdev_priv(dev); 385 struct net_device *phy_dev = ipvlan->phy_dev; 386 387 return vlan_vid_add(phy_dev, proto, vid); 388 } 389 390 static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, 391 u16 vid) 392 { 393 struct ipvl_dev *ipvlan = netdev_priv(dev); 394 struct net_device *phy_dev = ipvlan->phy_dev; 395 396 vlan_vid_del(phy_dev, proto, vid); 397 return 0; 398 } 399 400 static int ipvlan_get_iflink(const struct net_device *dev) 401 { 402 struct ipvl_dev *ipvlan = netdev_priv(dev); 403 404 return ipvlan->phy_dev->ifindex; 405 } 406 407 static const struct net_device_ops ipvlan_netdev_ops = { 408 .ndo_init = ipvlan_init, 409 .ndo_uninit = ipvlan_uninit, 410 .ndo_open = ipvlan_open, 411 .ndo_stop = ipvlan_stop, 412 .ndo_start_xmit = ipvlan_start_xmit, 413 .ndo_fix_features = ipvlan_fix_features, 414 .ndo_change_rx_flags = ipvlan_change_rx_flags, 415 .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, 416 .ndo_get_stats64 = ipvlan_get_stats64, 417 .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, 418 .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, 419 .ndo_get_iflink = ipvlan_get_iflink, 420 }; 421 422 static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, 423 unsigned short type, const void *daddr, 424 const void *saddr, unsigned len) 425 { 426 const struct ipvl_dev *ipvlan = netdev_priv(dev); 427 struct net_device *phy_dev = ipvlan->phy_dev; 428 429 /* TODO Probably use a different field than dev_addr so that the 430 * mac-address on the virtual device is portable and can be carried 431 * while the packets use the mac-addr on the physical device. 432 */ 433 return dev_hard_header(skb, phy_dev, type, daddr, 434 saddr ? : phy_dev->dev_addr, len); 435 } 436 437 static const struct header_ops ipvlan_header_ops = { 438 .create = ipvlan_hard_header, 439 .parse = eth_header_parse, 440 .cache = eth_header_cache, 441 .cache_update = eth_header_cache_update, 442 }; 443 444 static bool netif_is_ipvlan(const struct net_device *dev) 445 { 446 /* both ipvlan and ipvtap devices use the same netdev_ops */ 447 return dev->netdev_ops == &ipvlan_netdev_ops; 448 } 449 450 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev, 451 struct ethtool_link_ksettings *cmd) 452 { 453 const struct ipvl_dev *ipvlan = netdev_priv(dev); 454 455 return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd); 456 } 457 458 static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, 459 struct ethtool_drvinfo *drvinfo) 460 { 461 strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); 462 strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); 463 } 464 465 static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) 466 { 467 const struct ipvl_dev *ipvlan = netdev_priv(dev); 468 469 return ipvlan->msg_enable; 470 } 471 472 static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) 473 { 474 struct ipvl_dev *ipvlan = netdev_priv(dev); 475 476 ipvlan->msg_enable = value; 477 } 478 479 static const struct ethtool_ops ipvlan_ethtool_ops = { 480 .get_link = ethtool_op_get_link, 481 .get_link_ksettings = ipvlan_ethtool_get_link_ksettings, 482 .get_drvinfo = ipvlan_ethtool_get_drvinfo, 483 .get_msglevel = ipvlan_ethtool_get_msglevel, 484 .set_msglevel = ipvlan_ethtool_set_msglevel, 485 }; 486 487 static int ipvlan_nl_changelink(struct net_device *dev, 488 struct nlattr *tb[], struct nlattr *data[], 489 struct netlink_ext_ack *extack) 490 { 491 struct ipvl_dev *ipvlan = netdev_priv(dev); 492 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 493 int err = 0; 494 495 if (!data) 496 return 0; 497 498 if (data[IFLA_IPVLAN_MODE]) { 499 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 500 501 err = ipvlan_set_port_mode(port, nmode); 502 } 503 504 if (!err && data[IFLA_IPVLAN_FLAGS]) { 505 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 506 507 if (flags & IPVLAN_F_PRIVATE) 508 ipvlan_mark_private(port); 509 else 510 ipvlan_clear_private(port); 511 512 if (flags & IPVLAN_F_VEPA) 513 ipvlan_mark_vepa(port); 514 else 515 ipvlan_clear_vepa(port); 516 } 517 518 return err; 519 } 520 521 static size_t ipvlan_nl_getsize(const struct net_device *dev) 522 { 523 return (0 524 + nla_total_size(2) /* IFLA_IPVLAN_MODE */ 525 + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */ 526 ); 527 } 528 529 static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], 530 struct netlink_ext_ack *extack) 531 { 532 if (!data) 533 return 0; 534 535 if (data[IFLA_IPVLAN_MODE]) { 536 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 537 538 if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX) 539 return -EINVAL; 540 } 541 if (data[IFLA_IPVLAN_FLAGS]) { 542 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 543 544 /* Only two bits are used at this moment. */ 545 if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 546 return -EINVAL; 547 /* Also both flags can't be active at the same time. */ 548 if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) == 549 (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 550 return -EINVAL; 551 } 552 553 return 0; 554 } 555 556 static int ipvlan_nl_fillinfo(struct sk_buff *skb, 557 const struct net_device *dev) 558 { 559 struct ipvl_dev *ipvlan = netdev_priv(dev); 560 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 561 int ret = -EINVAL; 562 563 if (!port) 564 goto err; 565 566 ret = -EMSGSIZE; 567 if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) 568 goto err; 569 if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags)) 570 goto err; 571 572 return 0; 573 574 err: 575 return ret; 576 } 577 578 int ipvlan_link_new(struct net *src_net, struct net_device *dev, 579 struct nlattr *tb[], struct nlattr *data[], 580 struct netlink_ext_ack *extack) 581 { 582 struct ipvl_dev *ipvlan = netdev_priv(dev); 583 struct ipvl_port *port; 584 struct net_device *phy_dev; 585 int err; 586 u16 mode = IPVLAN_MODE_L3; 587 588 if (!tb[IFLA_LINK]) 589 return -EINVAL; 590 591 phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); 592 if (!phy_dev) 593 return -ENODEV; 594 595 if (netif_is_ipvlan(phy_dev)) { 596 struct ipvl_dev *tmp = netdev_priv(phy_dev); 597 598 phy_dev = tmp->phy_dev; 599 } else if (!netif_is_ipvlan_port(phy_dev)) { 600 /* Exit early if the underlying link is invalid or busy */ 601 if (phy_dev->type != ARPHRD_ETHER || 602 phy_dev->flags & IFF_LOOPBACK) { 603 netdev_err(phy_dev, 604 "Master is either lo or non-ether device\n"); 605 return -EINVAL; 606 } 607 608 if (netdev_is_rx_handler_busy(phy_dev)) { 609 netdev_err(phy_dev, "Device is already in use.\n"); 610 return -EBUSY; 611 } 612 } 613 614 ipvlan->phy_dev = phy_dev; 615 ipvlan->dev = dev; 616 ipvlan->sfeatures = IPVLAN_FEATURES; 617 if (!tb[IFLA_MTU]) 618 ipvlan_adjust_mtu(ipvlan, phy_dev); 619 INIT_LIST_HEAD(&ipvlan->addrs); 620 spin_lock_init(&ipvlan->addrs_lock); 621 622 /* TODO Probably put random address here to be presented to the 623 * world but keep using the physical-dev address for the outgoing 624 * packets. 625 */ 626 memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN); 627 628 dev->priv_flags |= IFF_NO_RX_HANDLER; 629 630 err = register_netdevice(dev); 631 if (err < 0) 632 return err; 633 634 /* ipvlan_init() would have created the port, if required */ 635 port = ipvlan_port_get_rtnl(phy_dev); 636 ipvlan->port = port; 637 638 /* If the port-id base is at the MAX value, then wrap it around and 639 * begin from 0x1 again. This may be due to a busy system where lots 640 * of slaves are getting created and deleted. 641 */ 642 if (port->dev_id_start == 0xFFFE) 643 port->dev_id_start = 0x1; 644 645 /* Since L2 address is shared among all IPvlan slaves including 646 * master, use unique 16 bit dev-ids to diffentiate among them. 647 * Assign IDs between 0x1 and 0xFFFE (used by the master) to each 648 * slave link [see addrconf_ifid_eui48()]. 649 */ 650 err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE, 651 GFP_KERNEL); 652 if (err < 0) 653 err = ida_simple_get(&port->ida, 0x1, port->dev_id_start, 654 GFP_KERNEL); 655 if (err < 0) 656 goto unregister_netdev; 657 dev->dev_id = err; 658 659 /* Increment id-base to the next slot for the future assignment */ 660 port->dev_id_start = err + 1; 661 662 err = netdev_upper_dev_link(phy_dev, dev, extack); 663 if (err) 664 goto remove_ida; 665 666 /* Flags are per port and latest update overrides. User has 667 * to be consistent in setting it just like the mode attribute. 668 */ 669 if (data && data[IFLA_IPVLAN_FLAGS]) 670 port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 671 672 if (data && data[IFLA_IPVLAN_MODE]) 673 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 674 675 err = ipvlan_set_port_mode(port, mode); 676 if (err) 677 goto unlink_netdev; 678 679 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); 680 netif_stacked_transfer_operstate(phy_dev, dev); 681 return 0; 682 683 unlink_netdev: 684 netdev_upper_dev_unlink(phy_dev, dev); 685 remove_ida: 686 ida_simple_remove(&port->ida, dev->dev_id); 687 unregister_netdev: 688 unregister_netdevice(dev); 689 return err; 690 } 691 EXPORT_SYMBOL_GPL(ipvlan_link_new); 692 693 void ipvlan_link_delete(struct net_device *dev, struct list_head *head) 694 { 695 struct ipvl_dev *ipvlan = netdev_priv(dev); 696 struct ipvl_addr *addr, *next; 697 698 spin_lock_bh(&ipvlan->addrs_lock); 699 list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { 700 ipvlan_ht_addr_del(addr); 701 list_del_rcu(&addr->anode); 702 kfree_rcu(addr, rcu); 703 } 704 spin_unlock_bh(&ipvlan->addrs_lock); 705 706 ida_simple_remove(&ipvlan->port->ida, dev->dev_id); 707 list_del_rcu(&ipvlan->pnode); 708 unregister_netdevice_queue(dev, head); 709 netdev_upper_dev_unlink(ipvlan->phy_dev, dev); 710 } 711 EXPORT_SYMBOL_GPL(ipvlan_link_delete); 712 713 void ipvlan_link_setup(struct net_device *dev) 714 { 715 ether_setup(dev); 716 717 dev->max_mtu = ETH_MAX_MTU; 718 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 719 dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; 720 dev->netdev_ops = &ipvlan_netdev_ops; 721 dev->needs_free_netdev = true; 722 dev->header_ops = &ipvlan_header_ops; 723 dev->ethtool_ops = &ipvlan_ethtool_ops; 724 } 725 EXPORT_SYMBOL_GPL(ipvlan_link_setup); 726 727 static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = 728 { 729 [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, 730 [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 }, 731 }; 732 733 static struct rtnl_link_ops ipvlan_link_ops = { 734 .kind = "ipvlan", 735 .priv_size = sizeof(struct ipvl_dev), 736 737 .setup = ipvlan_link_setup, 738 .newlink = ipvlan_link_new, 739 .dellink = ipvlan_link_delete, 740 }; 741 742 int ipvlan_link_register(struct rtnl_link_ops *ops) 743 { 744 ops->get_size = ipvlan_nl_getsize; 745 ops->policy = ipvlan_nl_policy; 746 ops->validate = ipvlan_nl_validate; 747 ops->fill_info = ipvlan_nl_fillinfo; 748 ops->changelink = ipvlan_nl_changelink; 749 ops->maxtype = IFLA_IPVLAN_MAX; 750 return rtnl_link_register(ops); 751 } 752 EXPORT_SYMBOL_GPL(ipvlan_link_register); 753 754 static int ipvlan_device_event(struct notifier_block *unused, 755 unsigned long event, void *ptr) 756 { 757 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 758 struct ipvl_dev *ipvlan, *next; 759 struct ipvl_port *port; 760 LIST_HEAD(lst_kill); 761 762 if (!netif_is_ipvlan_port(dev)) 763 return NOTIFY_DONE; 764 765 port = ipvlan_port_get_rtnl(dev); 766 767 switch (event) { 768 case NETDEV_CHANGE: 769 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 770 netif_stacked_transfer_operstate(ipvlan->phy_dev, 771 ipvlan->dev); 772 break; 773 774 case NETDEV_REGISTER: { 775 struct net *oldnet, *newnet = dev_net(dev); 776 struct ipvlan_netns *old_vnet; 777 778 oldnet = read_pnet(&port->pnet); 779 if (net_eq(newnet, oldnet)) 780 break; 781 782 write_pnet(&port->pnet, newnet); 783 784 old_vnet = net_generic(oldnet, ipvlan_netid); 785 if (!old_vnet->ipvl_nf_hook_refcnt) 786 break; 787 788 ipvlan_register_nf_hook(newnet); 789 ipvlan_unregister_nf_hook(oldnet); 790 break; 791 } 792 case NETDEV_UNREGISTER: 793 if (dev->reg_state != NETREG_UNREGISTERING) 794 break; 795 796 list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) 797 ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, 798 &lst_kill); 799 unregister_netdevice_many(&lst_kill); 800 break; 801 802 case NETDEV_FEAT_CHANGE: 803 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 804 ipvlan->dev->features = dev->features & IPVLAN_FEATURES; 805 ipvlan->dev->gso_max_size = dev->gso_max_size; 806 ipvlan->dev->gso_max_segs = dev->gso_max_segs; 807 netdev_features_change(ipvlan->dev); 808 } 809 break; 810 811 case NETDEV_CHANGEMTU: 812 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 813 ipvlan_adjust_mtu(ipvlan, dev); 814 break; 815 816 case NETDEV_CHANGEADDR: 817 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 818 ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr); 819 call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); 820 } 821 break; 822 823 case NETDEV_PRE_TYPE_CHANGE: 824 /* Forbid underlying device to change its type. */ 825 return NOTIFY_BAD; 826 } 827 return NOTIFY_DONE; 828 } 829 830 /* the caller must held the addrs lock */ 831 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 832 { 833 struct ipvl_addr *addr; 834 835 addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); 836 if (!addr) 837 return -ENOMEM; 838 839 addr->master = ipvlan; 840 if (!is_v6) { 841 memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr)); 842 addr->atype = IPVL_IPV4; 843 #if IS_ENABLED(CONFIG_IPV6) 844 } else { 845 memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr)); 846 addr->atype = IPVL_IPV6; 847 #endif 848 } 849 850 list_add_tail_rcu(&addr->anode, &ipvlan->addrs); 851 852 /* If the interface is not up, the address will be added to the hash 853 * list by ipvlan_open. 854 */ 855 if (netif_running(ipvlan->dev)) 856 ipvlan_ht_addr_add(ipvlan, addr); 857 858 return 0; 859 } 860 861 static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 862 { 863 struct ipvl_addr *addr; 864 865 spin_lock_bh(&ipvlan->addrs_lock); 866 addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); 867 if (!addr) { 868 spin_unlock_bh(&ipvlan->addrs_lock); 869 return; 870 } 871 872 ipvlan_ht_addr_del(addr); 873 list_del_rcu(&addr->anode); 874 spin_unlock_bh(&ipvlan->addrs_lock); 875 kfree_rcu(addr, rcu); 876 } 877 878 static bool ipvlan_is_valid_dev(const struct net_device *dev) 879 { 880 struct ipvl_dev *ipvlan = netdev_priv(dev); 881 882 if (!netif_is_ipvlan(dev)) 883 return false; 884 885 if (!ipvlan || !ipvlan->port) 886 return false; 887 888 return true; 889 } 890 891 #if IS_ENABLED(CONFIG_IPV6) 892 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 893 { 894 int ret = -EINVAL; 895 896 spin_lock_bh(&ipvlan->addrs_lock); 897 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) 898 netif_err(ipvlan, ifup, ipvlan->dev, 899 "Failed to add IPv6=%pI6c addr for %s intf\n", 900 ip6_addr, ipvlan->dev->name); 901 else 902 ret = ipvlan_add_addr(ipvlan, ip6_addr, true); 903 spin_unlock_bh(&ipvlan->addrs_lock); 904 return ret; 905 } 906 907 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 908 { 909 return ipvlan_del_addr(ipvlan, ip6_addr, true); 910 } 911 912 static int ipvlan_addr6_event(struct notifier_block *unused, 913 unsigned long event, void *ptr) 914 { 915 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; 916 struct net_device *dev = (struct net_device *)if6->idev->dev; 917 struct ipvl_dev *ipvlan = netdev_priv(dev); 918 919 if (!ipvlan_is_valid_dev(dev)) 920 return NOTIFY_DONE; 921 922 switch (event) { 923 case NETDEV_UP: 924 if (ipvlan_add_addr6(ipvlan, &if6->addr)) 925 return NOTIFY_BAD; 926 break; 927 928 case NETDEV_DOWN: 929 ipvlan_del_addr6(ipvlan, &if6->addr); 930 break; 931 } 932 933 return NOTIFY_OK; 934 } 935 936 static int ipvlan_addr6_validator_event(struct notifier_block *unused, 937 unsigned long event, void *ptr) 938 { 939 struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; 940 struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; 941 struct ipvl_dev *ipvlan = netdev_priv(dev); 942 943 if (!ipvlan_is_valid_dev(dev)) 944 return NOTIFY_DONE; 945 946 switch (event) { 947 case NETDEV_UP: 948 if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { 949 NL_SET_ERR_MSG(i6vi->extack, 950 "Address already assigned to an ipvlan device"); 951 return notifier_from_errno(-EADDRINUSE); 952 } 953 break; 954 } 955 956 return NOTIFY_OK; 957 } 958 #endif 959 960 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 961 { 962 int ret = -EINVAL; 963 964 spin_lock_bh(&ipvlan->addrs_lock); 965 if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) 966 netif_err(ipvlan, ifup, ipvlan->dev, 967 "Failed to add IPv4=%pI4 on %s intf.\n", 968 ip4_addr, ipvlan->dev->name); 969 else 970 ret = ipvlan_add_addr(ipvlan, ip4_addr, false); 971 spin_unlock_bh(&ipvlan->addrs_lock); 972 return ret; 973 } 974 975 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 976 { 977 return ipvlan_del_addr(ipvlan, ip4_addr, false); 978 } 979 980 static int ipvlan_addr4_event(struct notifier_block *unused, 981 unsigned long event, void *ptr) 982 { 983 struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; 984 struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; 985 struct ipvl_dev *ipvlan = netdev_priv(dev); 986 struct in_addr ip4_addr; 987 988 if (!ipvlan_is_valid_dev(dev)) 989 return NOTIFY_DONE; 990 991 switch (event) { 992 case NETDEV_UP: 993 ip4_addr.s_addr = if4->ifa_address; 994 if (ipvlan_add_addr4(ipvlan, &ip4_addr)) 995 return NOTIFY_BAD; 996 break; 997 998 case NETDEV_DOWN: 999 ip4_addr.s_addr = if4->ifa_address; 1000 ipvlan_del_addr4(ipvlan, &ip4_addr); 1001 break; 1002 } 1003 1004 return NOTIFY_OK; 1005 } 1006 1007 static int ipvlan_addr4_validator_event(struct notifier_block *unused, 1008 unsigned long event, void *ptr) 1009 { 1010 struct in_validator_info *ivi = (struct in_validator_info *)ptr; 1011 struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; 1012 struct ipvl_dev *ipvlan = netdev_priv(dev); 1013 1014 if (!ipvlan_is_valid_dev(dev)) 1015 return NOTIFY_DONE; 1016 1017 switch (event) { 1018 case NETDEV_UP: 1019 if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { 1020 NL_SET_ERR_MSG(ivi->extack, 1021 "Address already assigned to an ipvlan device"); 1022 return notifier_from_errno(-EADDRINUSE); 1023 } 1024 break; 1025 } 1026 1027 return NOTIFY_OK; 1028 } 1029 1030 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { 1031 .notifier_call = ipvlan_addr4_event, 1032 }; 1033 1034 static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = { 1035 .notifier_call = ipvlan_addr4_validator_event, 1036 }; 1037 1038 static struct notifier_block ipvlan_notifier_block __read_mostly = { 1039 .notifier_call = ipvlan_device_event, 1040 }; 1041 1042 #if IS_ENABLED(CONFIG_IPV6) 1043 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { 1044 .notifier_call = ipvlan_addr6_event, 1045 }; 1046 1047 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { 1048 .notifier_call = ipvlan_addr6_validator_event, 1049 }; 1050 #endif 1051 1052 static void ipvlan_ns_exit(struct net *net) 1053 { 1054 struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 1055 1056 if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { 1057 vnet->ipvl_nf_hook_refcnt = 0; 1058 nf_unregister_net_hooks(net, ipvl_nfops, 1059 ARRAY_SIZE(ipvl_nfops)); 1060 } 1061 } 1062 1063 static struct pernet_operations ipvlan_net_ops = { 1064 .id = &ipvlan_netid, 1065 .size = sizeof(struct ipvlan_netns), 1066 .exit = ipvlan_ns_exit, 1067 }; 1068 1069 static int __init ipvlan_init_module(void) 1070 { 1071 int err; 1072 1073 ipvlan_init_secret(); 1074 register_netdevice_notifier(&ipvlan_notifier_block); 1075 #if IS_ENABLED(CONFIG_IPV6) 1076 register_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1077 register_inet6addr_validator_notifier( 1078 &ipvlan_addr6_vtor_notifier_block); 1079 #endif 1080 register_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1081 register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); 1082 1083 err = register_pernet_subsys(&ipvlan_net_ops); 1084 if (err < 0) 1085 goto error; 1086 1087 err = ipvlan_link_register(&ipvlan_link_ops); 1088 if (err < 0) { 1089 unregister_pernet_subsys(&ipvlan_net_ops); 1090 goto error; 1091 } 1092 1093 return 0; 1094 error: 1095 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1096 unregister_inetaddr_validator_notifier( 1097 &ipvlan_addr4_vtor_notifier_block); 1098 #if IS_ENABLED(CONFIG_IPV6) 1099 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1100 unregister_inet6addr_validator_notifier( 1101 &ipvlan_addr6_vtor_notifier_block); 1102 #endif 1103 unregister_netdevice_notifier(&ipvlan_notifier_block); 1104 return err; 1105 } 1106 1107 static void __exit ipvlan_cleanup_module(void) 1108 { 1109 rtnl_link_unregister(&ipvlan_link_ops); 1110 unregister_pernet_subsys(&ipvlan_net_ops); 1111 unregister_netdevice_notifier(&ipvlan_notifier_block); 1112 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1113 unregister_inetaddr_validator_notifier( 1114 &ipvlan_addr4_vtor_notifier_block); 1115 #if IS_ENABLED(CONFIG_IPV6) 1116 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1117 unregister_inet6addr_validator_notifier( 1118 &ipvlan_addr6_vtor_notifier_block); 1119 #endif 1120 } 1121 1122 module_init(ipvlan_init_module); 1123 module_exit(ipvlan_cleanup_module); 1124 1125 MODULE_LICENSE("GPL"); 1126 MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); 1127 MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); 1128 MODULE_ALIAS_RTNL_LINK("ipvlan"); 1129